diff --git a/.env.example b/.env.example new file mode 100644 index 0000000..aa78a21 --- /dev/null +++ b/.env.example @@ -0,0 +1,14 @@ +DB_HOST=localhost +DB_PORT=5432 +DB_NAME=options_db +DB_USER=quant_user +DB_PASSWORD=change_me +PIPELINE_SYMBOLS=SPY + +# For scripts/setup_postgres.py when creating role/database: +# Use a superuser/admin account that can CREATE ROLE and CREATE DATABASE. +POSTGRES_ADMIN_USER=postgres +POSTGRES_ADMIN_PASSWORD=postgres +POSTGRES_ADMIN_HOST=localhost +POSTGRES_ADMIN_PORT=5432 +POSTGRES_ADMIN_DB=postgres diff --git a/.gitignore b/.gitignore index d13a870..4b75572 100644 --- a/.gitignore +++ b/.gitignore @@ -4,3 +4,28 @@ __pycache__/ *.pyo .ipynb_checkpoints/ + +# Built Python extension dropped next to qengine/__init__.py for local dev +/qengine/*.so +/qengine/*.dylib +/qengine/__pycache__/ + +/skbuild-build/ + +/build/ +/.idea/ +**/__pycache__/ +/docs/html/ +/docs/latex/ + +# Local reference tree (optional clone) +/CPP-design-pattern-derivatives-pricing/ + +# Local environment and secrets +.env +.env.* +!.env.example + +# Local tooling caches +/.pycache/ +/.mplconfig/ diff --git a/CMakeLists.txt b/CMakeLists.txt index 5ff7f7d..85e20b8 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -4,31 +4,53 @@ project(QuantEngine) set(CMAKE_CXX_STANDARD 20) set(CMAKE_CXX_FLAGS "-O3 -march=native") +option(BUILD_TESTING "Build GoogleTest target and tests" ON) + +set(PYBIND11_FINDPYTHON ON) +find_package(Python3 REQUIRED COMPONENTS Interpreter Development.Module) find_package(Eigen3 REQUIRED) +find_package(pybind11 CONFIG REQUIRED) #find_package(PostgreSQL REQUIRED) #find_package(PkgConfig REQUIRED) #pkg_check_modules(PQXX REQUIRED IMPORTED_TARGET libpqxx) -add_subdirectory(src) +add_subdirectory(cpp) -# Testing -enable_testing() +find_package(Doxygen OPTIONAL_COMPONENTS dot) +if(DOXYGEN_FOUND) + add_custom_target( + docs + COMMAND ${DOXYGEN_EXECUTABLE} ${CMAKE_SOURCE_DIR}/docs/Doxyfile + WORKING_DIRECTORY ${CMAKE_SOURCE_DIR} + COMMENT "Generating API documentation (HTML in docs/html)" + VERBATIM) +endif() -include(FetchContent) +install(FILES "${CMAKE_SOURCE_DIR}/qengine/__init__.py" DESTINATION qengine) +install(TARGETS qengine_cpp + LIBRARY DESTINATION qengine + RUNTIME DESTINATION qengine) -FetchContent_Declare( - googletest - URL https://github.com/google/googletest/archive/refs/tags/v1.14.0.zip - DOWNLOAD_EXTRACT_TIMESTAMP TRUE -) +if(BUILD_TESTING) + enable_testing() -FetchContent_MakeAvailable(googletest) + include(FetchContent) -add_executable(qengine_tests - tests/test_black_scholes.cpp - tests/stubs/FlatYieldCurve.cpp - tests/stubs/FlatVolatilitySurface.cpp) + FetchContent_Declare( + googletest + URL https://github.com/google/googletest/archive/refs/tags/v1.14.0.zip + DOWNLOAD_EXTRACT_TIMESTAMP TRUE + ) -target_link_libraries(qengine_tests qengine GTest::gtest_main) -include(GoogleTest) -gtest_discover_tests(qengine_tests) + FetchContent_MakeAvailable(googletest) + + add_executable(qengine_tests + tests/test_black_scholes.cpp + tests/stubs/FlatYieldCurve.cpp + tests/stubs/FlatVolatilitySurface.cpp) + + target_include_directories(qengine_tests PRIVATE ${CMAKE_SOURCE_DIR}/tests) + target_link_libraries(qengine_tests qengine_core GTest::gtest_main) + include(GoogleTest) + gtest_discover_tests(qengine_tests) +endif() diff --git a/README.md b/README.md index d1bb550..896381a 100644 --- a/README.md +++ b/README.md @@ -1,5 +1,79 @@ -# pricing +# option_pricing -Monte Carlo pricing of European options under Black–Scholes +C++/Python quantitative finance engine for option pricing, implied-volatility analysis, and market-data ingestion. -### Project structure +## What is included + +- `cpp/`: core C++ pricing library (Monte Carlo + Black-Scholes closed form), DB ingestion hooks, and pybind bindings. +- `qengine/`: Python package exposing the native extension (`import qengine`). +- `src/ImpliedVolatility/`: SVI calibration and implied-volatility tooling. +- `src/data/`: data ingestion, SQL schema, and analytics helpers. +- `tests/`: C++ unit tests (GoogleTest). +- `scripts/`: operational scripts, including PostgreSQL setup. +- `docs/`: Doxygen configuration and generated API docs (ignored in git for publication). + +## Quickstart + +### 1) Clone and create a Python environment + +```bash +python3 -m venv .venv +source .venv/bin/activate +pip install --upgrade pip +pip install -e . +pip install pandas yfinance sqlalchemy psycopg2-binary matplotlib scipy +``` + +### 2) Configure environment variables + +```bash +cp .env.example .env +``` + +Then edit `.env` with your local database credentials. + +### 3) Create database and schema + +Use the idempotent setup script: + +```bash +source .env +python scripts/setup_postgres.py +``` + +This script creates/updates: +- database role (`DB_USER`) +- database (`DB_NAME`) +- tables/indexes from `src/data/sql/schema.sql` + +### 4) Build C++ extension and run tests + +```bash +cmake -S . -B build +cmake --build build -j +ctest --test-dir build --output-on-failure +``` + +### 5) Run Yahoo options ingestion + +```bash +source .env +python src/data/ingestion/ingest_yahoo_options.py +``` + +`PIPELINE_SYMBOLS` in `.env` controls which symbols are ingested (comma-separated, e.g. `SPY,AAPL,QQQ`). + +## Security and publication notes + +- No credentials are stored in source code. +- `.env` files are git-ignored; only `.env.example` is committed. +- Before publishing, rotate any credentials that were ever committed in the past. +- Prefer least-privilege DB users for runtime ingestion jobs. + +## Generating C++ API docs + +```bash +cmake --build build --target docs +``` + +Generated output goes to `docs/html/` and is ignored in version control. diff --git a/src/data/config/__init__.py b/__init__.py similarity index 100% rename from src/data/config/__init__.py rename to __init__.py diff --git a/cpp/BSWrapper.cpp b/cpp/BSWrapper.cpp new file mode 100644 index 0000000..affc616 --- /dev/null +++ b/cpp/BSWrapper.cpp @@ -0,0 +1,49 @@ +// +// Created by David Doebel on 27.03.2026. +// + +#include "BSWrapper.hpp" + +#include "BlackScholesClosedFormEngine.hpp" +#include "BlackScholesProcess.hpp" +#include "Instrument.hpp" +#include "Option.hpp" +#include "FlatVolatilitySurface.hpp" +#include "FlatYieldCurve.hpp" +#include +#include + +class FlatYieldCurve; + +double BSWrapper::bs_price_wrapper(double S, double K, double T, double r, double sigma, + bool is_call) { + std::shared_ptr flat_curve = std::make_shared(r); + auto flat_vol_surface = std::make_shared(sigma); + MarketData data(S,flat_curve, flat_vol_surface); + std::unique_ptr process = std::make_unique(data); + std::unique_ptr pricing_engine = + std::make_unique(std::move(process)); + std::unique_ptr payoff; + if (is_call) + payoff = std::make_unique(K); + else payoff = std::make_unique(K); + EuropeanExercise exercise(T); + VanillaOption option(T,std::make_unique(exercise), + std::move(payoff),std::move(pricing_engine)); + return option.price(); +} + +std::vector BSWrapper::batch_bs_price_wrapper(const std::vector &S, const std::vector &K, + const std::vector &T, const std::vector &r, const std::vector &sigma, + const std::vector &is_call) { + assert(K.size() == S.size() && K.size() == T.size() && K.size() == r.size() && K.size() == + sigma.size() && K.size() == is_call.size()); + std::size_t n = K.size(); + std::vector result(n); + for (std::size_t i = 0; i < n; ++i) { + result[i] = bs_price_wrapper(S[i], K[i], T[i], r[i], sigma[i], is_call[i]); + if (i % 100 == 0) + std::cout << "i = " << i << " result = " << result[i] << std::endl; // ( i % 1000 == 0) + } + return result; +} diff --git a/cpp/BSWrapper.hpp b/cpp/BSWrapper.hpp new file mode 100644 index 0000000..0ab73d6 --- /dev/null +++ b/cpp/BSWrapper.hpp @@ -0,0 +1,24 @@ +/** + * @file BSWrapper.hpp + * @brief Black–Scholes vanilla price (closed form; used from Python / implied vol). + */ + +#ifndef QUANTENGINE_BSWRAPPER_HPP +#define QUANTENGINE_BSWRAPPER_HPP +#include + +/** + * @brief Static helpers wrapping scalar and batch pricing. + */ +class BSWrapper { +public: + BSWrapper() = delete; + static double bs_price_wrapper(double S, double K, double T, double r, double sigma, bool is_call); + static std::vector batch_bs_price_wrapper(const std::vector& S, const std::vector& K, + const std::vector& T, const std::vector& r, const std::vector& sigma, + const std::vector& is_call); + +}; + + +#endif //QUANTENGINE_BSWRAPPER_HPP \ No newline at end of file diff --git a/cpp/BlackScholesClosedFormEngine.cpp b/cpp/BlackScholesClosedFormEngine.cpp new file mode 100644 index 0000000..d2060d2 --- /dev/null +++ b/cpp/BlackScholesClosedFormEngine.cpp @@ -0,0 +1,69 @@ +/** + * @file BlackScholesClosedFormEngine.cpp + * @brief Black–Scholes closed-form pricing (calls, puts, cash-or-nothing digital). + */ + +#include "BlackScholesClosedFormEngine.hpp" +#include "Instrument.hpp" +#include "Payoff.hpp" +#include +#include + +namespace { + +double norm_cdf(double x) { + return 0.5 * (1.0 + std::erf(x / std::sqrt(2.0))); +} + +} // namespace + +double BlackScholesClosedFormEngine::calculate(const Instrument &instrument) const { + if (instrument.exerciseType() != Exercise::Type::European) { + throw std::invalid_argument("BlackScholesClosedFormEngine: European exercise only"); + } + + const double T = instrument.maturity(); + const MarketData &md = process_->data(); + const double S = md.spot(); + double K = instrument.payoff().strike(); + const PayoffKind pk = instrument.payoff().kind(); + + if (T <= 0.0) { + return instrument.payoff()(S); + } + + const double r = md.yield_curve().zeroRate(T); + const double sigma = md.volatility_surface().sigma(K, T); + if (sigma <= 0.0) { + throw std::invalid_argument("BlackScholesClosedFormEngine: volatility must be positive"); + } + + const double disc = md.yield_curve().discount(T); + const double sqrtT = std::sqrt(T); + const double sig_sqrtT = sigma * sqrtT; + + if (sig_sqrtT < 1e-15) { + const double forward = S * std::exp(r * T); + switch (pk) { + case PayoffKind::Call: + return disc * std::max(0.0, forward - K); + case PayoffKind::Put: + return disc * std::max(0.0, K - forward); + case PayoffKind::Digital: + return (forward > K) ? disc : 0.0; + } + } + + const double d1 = (std::log(S / K) + (r + 0.5 * sigma * sigma) * T) / sig_sqrtT; + const double d2 = d1 - sig_sqrtT; + + switch (pk) { + case PayoffKind::Call: + return S * norm_cdf(d1) - K * disc * norm_cdf(d2); + case PayoffKind::Put: + return K * disc * norm_cdf(-d2) - S * norm_cdf(-d1); + case PayoffKind::Digital: + return disc * norm_cdf(d2); + } + throw std::logic_error("BlackScholesClosedFormEngine: unhandled PayoffKind"); +} diff --git a/cpp/BlackScholesClosedFormEngine.hpp b/cpp/BlackScholesClosedFormEngine.hpp new file mode 100644 index 0000000..769b852 --- /dev/null +++ b/cpp/BlackScholesClosedFormEngine.hpp @@ -0,0 +1,22 @@ +/** + * @file BlackScholesClosedFormEngine.hpp + * @brief Risk-neutral Black–Scholes formula for European payoffs under GBM (flat or surface inputs via @ref MarketData). + */ + +#ifndef QUANTENGINE_BLACKSCHOLESCLOSEDFORMENGINE_HPP +#define QUANTENGINE_BLACKSCHOLESCLOSEDFORMENGINE_HPP + +#include "PricingEngine.hpp" + +/** + * @brief Analytic European vanilla / digital prices using @f$r@f$ and @f$\sigma(K,T)@f$ from the embedded process’s @ref MarketData. + */ +class BlackScholesClosedFormEngine : public PricingEngine { +public: + explicit BlackScholesClosedFormEngine(std::unique_ptr process) + : PricingEngine(std::move(process)) {} + + double calculate(const Instrument &instrument) const override; +}; + +#endif // QUANTENGINE_BLACKSCHOLESCLOSEDFORMENGINE_HPP diff --git a/src/BlackScholesProcess.cpp b/cpp/BlackScholesProcess.cpp similarity index 85% rename from src/BlackScholesProcess.cpp rename to cpp/BlackScholesProcess.cpp index 127a715..43681d9 100644 --- a/src/BlackScholesProcess.cpp +++ b/cpp/BlackScholesProcess.cpp @@ -1,6 +1,7 @@ -// -// Created by David Doebel on 06.03.2026. -// +/** + * @file BlackScholesProcess.cpp + * @brief Black–Scholes GBM drift, diffusion, and step. + */ #include "BlackScholesProcess.hpp" diff --git a/src/BlackScholesProcess.hpp b/cpp/BlackScholesProcess.hpp similarity index 70% rename from src/BlackScholesProcess.hpp rename to cpp/BlackScholesProcess.hpp index 320f34f..64a67a9 100644 --- a/src/BlackScholesProcess.hpp +++ b/cpp/BlackScholesProcess.hpp @@ -1,12 +1,15 @@ -// -// Created by David Doebel on 06.03.2026. -// +/** + * @file BlackScholesProcess.hpp + * @brief Geometric Brownian motion with yield and volatility surfaces. + */ #ifndef QUANTENGINE_BLACKSCHOLESPROCESS_HPP #define QUANTENGINE_BLACKSCHOLESPROCESS_HPP #include "StochasticProcess.hpp" - +/** + * @brief GBM: drift @f$r_t S@f$, diffusion @f$\sigma(S,t) S@f$, exact log-step. + */ class BlackScholesProcess : public StochasticProcess{ public: explicit BlackScholesProcess(MarketData data) : StochasticProcess(std::move(data)){} diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt new file mode 100644 index 0000000..eef55bf --- /dev/null +++ b/cpp/CMakeLists.txt @@ -0,0 +1,65 @@ +add_library(qengine_core + Instrument.cpp + Instrument.hpp + Payoff.cpp + Payoff.hpp + Option.cpp + Option.hpp + PricingEngine.cpp + PricingEngine.hpp + MonteCarloEngine.cpp + MonteCarloEngine.hpp + StochasticProcess.cpp + StochasticProcess.hpp + Exercise.cpp + Exercise.hpp + MarketData.cpp + MarketData.hpp + YieldCurve.cpp + YieldCurve.hpp + VolatilitySurface.cpp + VolatilitySurface.hpp + RandomGenerator.cpp + RandomGenerator.hpp + Statistics.cpp + Statistics.hpp + BlackScholesClosedFormEngine.cpp + BlackScholesClosedFormEngine.hpp + BlackScholesProcess.cpp + BlackScholesProcess.hpp + DBIngest.cpp + DBIngest.hpp + BSWrapper.cpp + BSWrapper.hpp + NewtonSolver.cpp + NewtonSolver.hpp +) + +target_include_directories(qengine_core PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}) +target_include_directories(qengine_core PRIVATE + /opt/homebrew/include +) + +find_library(PQXX_LIB pqxx PATHS /opt/homebrew/lib /usr/local/lib /usr/lib) +find_library(PQ_LIB pq PATHS /opt/homebrew/opt/libpq/lib /opt/homebrew/lib /usr/local/lib /usr/lib) +if(NOT PQXX_LIB OR NOT PQ_LIB) + message(FATAL_ERROR "Could not find libpqxx and/or libpq (install via Homebrew: brew install libpqxx libpq)") +endif() + +target_link_libraries(qengine_core Eigen3::Eigen) +target_link_libraries(qengine_core ${PQXX_LIB} ${PQ_LIB}) + +# Python import path: package qengine, extension submodule qengine (file qengine/qengine*.so) +pybind11_add_module(qengine_cpp MODULE ImpliedVolatility/Pybind.cpp) +set_target_properties(qengine_cpp PROPERTIES OUTPUT_NAME qengine) +target_link_libraries(qengine_cpp PRIVATE qengine_core) + +# Place the module next to qengine/__init__.py so `import qengine` works from the repo root +set(_qengine_py_pkg "${CMAKE_SOURCE_DIR}/qengine") +set_target_properties(qengine_cpp PROPERTIES + LIBRARY_OUTPUT_DIRECTORY "${_qengine_py_pkg}" + LIBRARY_OUTPUT_DIRECTORY_RELEASE "${_qengine_py_pkg}" + LIBRARY_OUTPUT_DIRECTORY_DEBUG "${_qengine_py_pkg}" + RUNTIME_OUTPUT_DIRECTORY "${_qengine_py_pkg}" + RUNTIME_OUTPUT_DIRECTORY_RELEASE "${_qengine_py_pkg}" + RUNTIME_OUTPUT_DIRECTORY_DEBUG "${_qengine_py_pkg}") diff --git a/src/DBIngest.cpp b/cpp/DBIngest.cpp similarity index 58% rename from src/DBIngest.cpp rename to cpp/DBIngest.cpp index 08ec793..81782e6 100644 --- a/src/DBIngest.cpp +++ b/cpp/DBIngest.cpp @@ -1,25 +1,30 @@ -// -// Created by David Doebel on 13.03.2026. -// +/** + * @file DBIngest.cpp + * @brief Database connection and placeholder update routines. + */ #include "DBIngest.hpp" +#include #include - -// Queries -// Query for selecting the volatility surface parameters -std::string vol_surface_query = "" - - - - -// - - - +#include bool DBIngest::connect() { - connection_ = pqxx::connection("dbname=options_db user=quant_user port = 5432 host = localhost password = strong_password" ); + const char* db_name = std::getenv("DB_NAME"); + const char* db_user = std::getenv("DB_USER"); + const char* db_password = std::getenv("DB_PASSWORD"); + const char* db_host = std::getenv("DB_HOST"); + const char* db_port = std::getenv("DB_PORT"); + + std::ostringstream conn_str; + conn_str + << "dbname=" << (db_name ? db_name : "options_db") + << " user=" << (db_user ? db_user : "quant_user") + << " host=" << (db_host ? db_host : "localhost") + << " port=" << (db_port ? db_port : "5432") + << " password=" << (db_password ? db_password : ""); + + connection_ = pqxx::connection(conn_str.str()); if(connection_.is_open()) { std::cout << "Connected\n"; @@ -31,6 +36,7 @@ bool DBIngest::connect() { bool DBIngest::disconnect() { connection_.close(); + return true; } bool DBIngest::update(VolatilitySurface &surface) { @@ -48,8 +54,11 @@ bool DBIngest::update(VolatilitySurface &surface) { for (auto row : result) { std::cout << row[0] << " " << row[1] << " " << row[2] << " " << row[3] << std::endl; } - + (void)surface; + return false; } bool DBIngest::update(YieldCurve &yield_curve) { + (void)yield_curve; + return false; } diff --git a/src/DBIngest.hpp b/cpp/DBIngest.hpp similarity index 66% rename from src/DBIngest.hpp rename to cpp/DBIngest.hpp index 07b230e..2cb1405 100644 --- a/src/DBIngest.hpp +++ b/cpp/DBIngest.hpp @@ -1,6 +1,7 @@ -// -// Created by David Doebel on 13.03.2026. -// +/** + * @file DBIngest.hpp + * @brief PostgreSQL helpers to load market objects (work in progress). + */ #ifndef QUANTENGINE_DBINGEST_HPP #define QUANTENGINE_DBINGEST_HPP @@ -10,6 +11,9 @@ #include "VolatilitySurface.hpp" #include "YieldCurve.hpp" +/** + * @brief Connects to Postgres via libpqxx and queries quotes for surface building. + */ class DBIngest { bool connect(); diff --git a/cpp/Exercise.cpp b/cpp/Exercise.cpp new file mode 100644 index 0000000..22a9595 --- /dev/null +++ b/cpp/Exercise.cpp @@ -0,0 +1,6 @@ +/** + * @file Exercise.cpp + * @brief @ref Exercise translation unit (interface only). + */ + +#include "Exercise.hpp" \ No newline at end of file diff --git a/src/Exercise.hpp b/cpp/Exercise.hpp similarity index 75% rename from src/Exercise.hpp rename to cpp/Exercise.hpp index 9fb780a..033c790 100644 --- a/src/Exercise.hpp +++ b/cpp/Exercise.hpp @@ -1,11 +1,15 @@ -// -// Created by David Doebel on 05.03.2026. -// +/** + * @file Exercise.hpp + * @brief Exercise style (European, American, Bermudan) and exercise times. + */ #ifndef QUANTENGINE_EXERCISE_HPP #define QUANTENGINE_EXERCISE_HPP #include +/** + * @brief Describes when the holder may exercise (metadata for pricing engines). + */ class Exercise { public: Exercise() = default; @@ -22,7 +26,9 @@ protected: }; +/** @brief Single exercise at maturity. */ class EuropeanExercise : public Exercise { +public: EuropeanExercise() : type_(Type::European) {}; EuropeanExercise(double maturity) : type_(Type::European){ exercise_times_.push_back(maturity); @@ -35,7 +41,9 @@ private: Type type_; }; +/** @brief Continuous American exercise from @f$t=0@f$ to maturity (placeholder grid). */ class AmericanExercise : public Exercise{ +public: AmericanExercise() : type_(Type::American) {}; AmericanExercise(double maturity) : type_(Type::American) { exercise_times_.push_back(0); diff --git a/cpp/FlatVolatilitySurface.cpp b/cpp/FlatVolatilitySurface.cpp new file mode 100644 index 0000000..a6bdcfb --- /dev/null +++ b/cpp/FlatVolatilitySurface.cpp @@ -0,0 +1,5 @@ +/** + * @file FlatVolatilitySurface.cpp + * @brief Ensures link visibility for @ref FlatVolatilitySurface. + */ +#include "FlatVolatilitySurface.hpp" diff --git a/cpp/FlatVolatilitySurface.hpp b/cpp/FlatVolatilitySurface.hpp new file mode 100644 index 0000000..8cea287 --- /dev/null +++ b/cpp/FlatVolatilitySurface.hpp @@ -0,0 +1,21 @@ +/** + * @file FlatVolatilitySurface.hpp + * @brief Constant implied volatility surface. + */ +#ifndef QUANTENGINE_FLATVOLATILITYSURFACE_HPP +#define QUANTENGINE_FLATVOLATILITYSURFACE_HPP +#include "VolatilitySurface.hpp" + +/** + * @brief @f$\sigma(K,T)\equiv\sigma_0@f$. + */ +class FlatVolatilitySurface : public VolatilitySurface { +public: + explicit FlatVolatilitySurface(double sigma = 0.2) : sigma_(sigma) {} + + double sigma(double K, double T) const override {return sigma_;} + +private: + double sigma_; +}; +#endif diff --git a/cpp/FlatYieldCurve.cpp b/cpp/FlatYieldCurve.cpp new file mode 100644 index 0000000..a204810 --- /dev/null +++ b/cpp/FlatYieldCurve.cpp @@ -0,0 +1,5 @@ +/** + * @file FlatYieldCurve.cpp + * @brief Ensures link visibility for @ref FlatYieldCurve (inline methods in header). + */ +#include "FlatYieldCurve.hpp" diff --git a/cpp/FlatYieldCurve.hpp b/cpp/FlatYieldCurve.hpp new file mode 100644 index 0000000..9427bb8 --- /dev/null +++ b/cpp/FlatYieldCurve.hpp @@ -0,0 +1,22 @@ +/** + * @file FlatYieldCurve.hpp + * @brief Constant zero rate yield curve. + */ +#ifndef QUANTENGINE_FLATYIELDCURVE_HPP +#define QUANTENGINE_FLATYIELDCURVE_HPP +#include "YieldCurve.hpp" +#include + +/** + * @brief @f$P(t)=e^{-r t}@f$, @f$f(t)\equiv r@f$. + */ +class FlatYieldCurve : public YieldCurve{ +public: + explicit FlatYieldCurve(double rate = 0.01) : rate_(rate) {} + + double discount(double t) const override {return std::exp(-rate_ * t); }; + double zeroRate(double t) const override {return rate_; } +private: + double rate_ = 0.01; +}; +#endif diff --git a/cpp/ImpliedVolatility/Pybind.cpp b/cpp/ImpliedVolatility/Pybind.cpp new file mode 100644 index 0000000..89d82e6 --- /dev/null +++ b/cpp/ImpliedVolatility/Pybind.cpp @@ -0,0 +1,93 @@ +/** + * @file Pybind.cpp + * @brief pybind11 module @c qengine exposing @ref BSWrapper::bs_price_wrapper overloads. + */ + +#include +#include +#include + +#include +#include +#include +#include + +#include "BSWrapper.hpp" + +namespace py = pybind11; + +namespace { + +std::vector to_vector_double(const py::array_t &a) { + py::buffer_info info = a.request(); + if (info.ndim != 1) { + throw std::runtime_error("expected 1-D ndarray for S, K, T, r, sigma"); + } + const auto *p = static_cast(info.ptr); + const ssize_t n = info.shape[0]; + return std::vector(p, p + n); +} + +std::vector to_vector_bool_1d(const py::array_t &a) { + py::buffer_info info = a.request(); + if (info.ndim != 1) { + throw std::runtime_error("expected 1-D ndarray for is_call"); + } + if (info.itemsize != 1) { + throw std::runtime_error("is_call: expected a boolean ndarray (dtype=bool)"); + } + const ssize_t n = info.shape[0]; + const auto *p = static_cast(info.ptr); + std::vector out(static_cast(n)); + for (ssize_t i = 0; i < n; ++i) { + out[static_cast(i)] = (p[i] != 0); + } + return out; +} + +void check_same_length(size_t n, size_t k, const char *name) { + if (n != k) { + throw std::runtime_error(std::string("length mismatch for ") + name); + } +} + +} // namespace + +PYBIND11_MODULE(qengine, m) { + m.doc() = "Binding for the Black Scholes model"; + + m.def( + "bs_price", + [](double S, double K, double T, double r, double sigma, bool is_call) { + return BSWrapper::bs_price_wrapper(S, K, T, r, sigma, is_call); + }, + py::arg("S"), py::arg("K"), py::arg("T"), py::arg("r"), py::arg("sigma"), py::arg("is_call")); + + m.def( + "bs_price", + [](py::array_t S, py::array_t K, py::array_t T, py::array_t r, + py::array_t sigma, py::array_t is_call) { + std::vector vS = to_vector_double(S); + std::vector vK = to_vector_double(K); + std::vector vT = to_vector_double(T); + std::vector vr = to_vector_double(r); + std::vector vsig = to_vector_double(sigma); + std::vector vC = to_vector_bool_1d(is_call); + const size_t n = vS.size(); + check_same_length(n, vK.size(), "K"); + check_same_length(n, vT.size(), "T"); + check_same_length(n, vr.size(), "r"); + check_same_length(n, vsig.size(), "sigma"); + check_same_length(n, vC.size(), "is_call"); + return BSWrapper::batch_bs_price_wrapper(vS, vK, vT, vr, vsig, vC); + }, + py::arg("S"), py::arg("K"), py::arg("T"), py::arg("r"), py::arg("sigma"), py::arg("is_call")); + + m.def( + "bs_price", + [](const std::vector &S, const std::vector &K, const std::vector &T, + const std::vector &r, const std::vector &sigma, const std::vector &is_call) { + return BSWrapper::batch_bs_price_wrapper(S, K, T, r, sigma, is_call); + }, + py::arg("S"), py::arg("K"), py::arg("T"), py::arg("r"), py::arg("sigma"), py::arg("is_call")); +} diff --git a/src/Instrument.cpp b/cpp/Instrument.cpp similarity index 80% rename from src/Instrument.cpp rename to cpp/Instrument.cpp index 35d8967..5106e34 100644 --- a/src/Instrument.cpp +++ b/cpp/Instrument.cpp @@ -1,6 +1,7 @@ -// -// Created by David Doebel on 05.03.2026. -// +/** + * @file Instrument.cpp + * @brief @ref Instrument implementation. + */ #include "Instrument.hpp" diff --git a/src/Instrument.hpp b/cpp/Instrument.hpp similarity index 62% rename from src/Instrument.hpp rename to cpp/Instrument.hpp index dfd6a56..8a1b5cf 100644 --- a/src/Instrument.hpp +++ b/cpp/Instrument.hpp @@ -1,15 +1,20 @@ -// -// Created by David Doebel on 05.03.2026. -// +/** + * @file Instrument.hpp + * @brief Generic derivative instrument: payoff plus pricing engine. + */ #ifndef QUANTENGINE_INSTRUMENT_HPP #define QUANTENGINE_INSTRUMENT_HPP +#include "Exercise.hpp" #include "Payoff.hpp" #include "PricingEngine.hpp" #include class PricingEngine; +/** + * @brief Represents a tradeable claim priced via a @ref PricingEngine. + */ class Instrument { public: Instrument() = default; @@ -24,6 +29,9 @@ public: return *payoff_; } + /** @brief Base @ref Instrument is treated as European unless overridden by @ref Option. */ + [[nodiscard]] virtual Exercise::Type exerciseType() const { return Exercise::Type::European; } + protected: double maturity_; std::unique_ptr payoff_; diff --git a/src/MarketData.cpp b/cpp/MarketData.cpp similarity index 78% rename from src/MarketData.cpp rename to cpp/MarketData.cpp index 16bf3c4..ea07d0e 100644 --- a/src/MarketData.cpp +++ b/cpp/MarketData.cpp @@ -1,6 +1,7 @@ -// -// Created by David Doebel on 06.03.2026. -// +/** + * @file MarketData.cpp + * @brief @ref MarketData accessors. + */ #include "MarketData.hpp" diff --git a/src/MarketData.hpp b/cpp/MarketData.hpp similarity index 82% rename from src/MarketData.hpp rename to cpp/MarketData.hpp index 9b8d200..bff093b 100644 --- a/src/MarketData.hpp +++ b/cpp/MarketData.hpp @@ -1,6 +1,7 @@ -// -// Created by David Doebel on 06.03.2026. -// +/** + * @file MarketData.hpp + * @brief Spot, discount curve, and volatility surface bundle. + */ #ifndef QUANTENGINE_MARKETDATA_HPP #define QUANTENGINE_MARKETDATA_HPP @@ -8,6 +9,9 @@ #include "VolatilitySurface.hpp" #include +/** + * @brief Immutable snapshot of inputs needed to simulate or price. + */ class MarketData { public: MarketData() = delete; diff --git a/src/MonteCarloEngine.cpp b/cpp/MonteCarloEngine.cpp similarity index 88% rename from src/MonteCarloEngine.cpp rename to cpp/MonteCarloEngine.cpp index 22b2611..d97c946 100644 --- a/src/MonteCarloEngine.cpp +++ b/cpp/MonteCarloEngine.cpp @@ -1,6 +1,7 @@ -// -// Created by David Doebel on 05.03.2026. -// +/** + * @file MonteCarloEngine.cpp + * @brief Monte Carlo mean estimator with discounting. + */ #include "MonteCarloEngine.hpp" #include diff --git a/src/MonteCarloEngine.hpp b/cpp/MonteCarloEngine.hpp similarity index 73% rename from src/MonteCarloEngine.hpp rename to cpp/MonteCarloEngine.hpp index 91c1c3b..10dcb2e 100644 --- a/src/MonteCarloEngine.hpp +++ b/cpp/MonteCarloEngine.hpp @@ -1,13 +1,16 @@ -// -// Created by David Doebel on 05.03.2026. -// +/** + * @file MonteCarloEngine.hpp + * @brief Monte Carlo pricing using a @ref StochasticProcess and @ref RandomGenerator. + */ #ifndef QUANTENGINE_MONTECARLOENGINE_HPP #define QUANTENGINE_MONTECARLOENGINE_HPP #include "PricingEngine.hpp" #include "RandomGenerator.hpp" - +/** + * @brief Simple path simulation: one Euler/exact step to horizon, average discounted payoff. + */ class MonteCarloEngine : public PricingEngine{ public: MonteCarloEngine() = default; diff --git a/cpp/NewtonSolver.cpp b/cpp/NewtonSolver.cpp new file mode 100644 index 0000000..325fb28 --- /dev/null +++ b/cpp/NewtonSolver.cpp @@ -0,0 +1,8 @@ +/** + * @file NewtonSolver.cpp + * @brief Placeholder translation unit for @ref NewtonSolver. + */ + +#include "NewtonSolver.hpp" + + diff --git a/src/NewtonSolver.hpp b/cpp/NewtonSolver.hpp similarity index 74% rename from src/NewtonSolver.hpp rename to cpp/NewtonSolver.hpp index 1e36680..0c4a9e2 100644 --- a/src/NewtonSolver.hpp +++ b/cpp/NewtonSolver.hpp @@ -1,12 +1,16 @@ -// -// Created by David Doebel on 13.03.2026. -// +/** + * @file NewtonSolver.hpp + * @brief Generic Newton iteration helper (incomplete / reserved for solvers). + */ #ifndef QUANTENGINE_GAUSSSOLVER_HPP #define QUANTENGINE_GAUSSSOLVER_HPP #include +/** + * @brief Template Newton step loop with relative/absolute tolerances. + */ class NewtonSolver { template bool solve(F&& func, DFinv&& dfinv,T x0 , double rtol, double atol) { diff --git a/src/Option.cpp b/cpp/Option.cpp similarity index 80% rename from src/Option.cpp rename to cpp/Option.cpp index fb2e80b..58643b3 100644 --- a/src/Option.cpp +++ b/cpp/Option.cpp @@ -1,6 +1,7 @@ -// -// Created by David Doebel on 05.03.2026. -// +/** + * @file Option.cpp + * @brief @ref Option implementation. + */ #include "Option.hpp" diff --git a/src/Option.hpp b/cpp/Option.hpp similarity index 63% rename from src/Option.hpp rename to cpp/Option.hpp index 49603e6..5e6c500 100644 --- a/src/Option.hpp +++ b/cpp/Option.hpp @@ -1,12 +1,16 @@ -// -// Created by David Doebel on 05.03.2026. -// +/** + * @file Option.hpp + * @brief Option instrument with exercise style (@ref Exercise). + */ #ifndef QUANTENGINE_OPTION_HPP #define QUANTENGINE_OPTION_HPP #include "Instrument.hpp" #include "Exercise.hpp" +/** + * @brief Extends @ref Instrument with exercise schedule / style metadata. + */ class Option : public Instrument{ public: Option() = default; @@ -17,10 +21,13 @@ public: return *exercise_; } + [[nodiscard]] Exercise::Type exerciseType() const override { return exercise_->type(); } + protected: std::unique_ptr exercise_; }; +/** @brief Plain-vanilla option using the base @ref Option constructor. */ class VanillaOption : public Option { public: using Option::Option; diff --git a/src/Payoff.cpp b/cpp/Payoff.cpp similarity index 80% rename from src/Payoff.cpp rename to cpp/Payoff.cpp index 5cb22e3..65a7111 100644 --- a/src/Payoff.cpp +++ b/cpp/Payoff.cpp @@ -1,6 +1,7 @@ -// -// Created by David Doebel on 05.03.2026. -// +/** + * @file Payoff.cpp + * @brief Payoff function implementations. + */ #include "Payoff.hpp" #include diff --git a/src/Payoff.hpp b/cpp/Payoff.hpp similarity index 56% rename from src/Payoff.hpp rename to cpp/Payoff.hpp index cf58cce..3b3bbc6 100644 --- a/src/Payoff.hpp +++ b/cpp/Payoff.hpp @@ -1,11 +1,19 @@ -// -// Created by David Doebel on 05.03.2026. -// +/** + * @file Payoff.hpp + * @brief Payoff interface and standard European payoffs (call, put, digital). + */ #ifndef QUANTENGINE_PAYOFF_HPP #define QUANTENGINE_PAYOFF_HPP +/** + * @brief Standard payoff shapes for routing (e.g. analytic vs Monte Carlo). + */ +enum class PayoffKind { Call, Put, Digital }; +/** + * @brief Terminal payoff as a function of spot @f$S_T@f$. + */ class Payoff { public: @@ -14,35 +22,42 @@ public: virtual ~Payoff() = default; virtual double operator()(double S) = 0; virtual double strike() = 0; + [[nodiscard]] virtual PayoffKind kind() const = 0; }; +/** @brief Standard European call @f$\max(S-K,0)@f$. */ class CallPayoff : public Payoff { public: CallPayoff() = default; CallPayoff(double strike) : strike_(strike) {} double operator()(double S) override; double strike() override {return strike_;} + [[nodiscard]] PayoffKind kind() const override { return PayoffKind::Call; } private: double strike_; }; +/** @brief Standard European put @f$\max(K-S,0)@f$. */ class PutPayoff : public Payoff { public: PutPayoff() = default; PutPayoff(double strike) : strike_(strike) {} double operator()(double S) override; double strike() override {return strike_;} + [[nodiscard]] PayoffKind kind() const override { return PayoffKind::Put; } private: double strike_; }; +/** @brief Digital (cash-or-nothing style) payoff @f$1_{S>K}@f$. */ class DigitalPayoff : public Payoff { public: DigitalPayoff() = default; DigitalPayoff(double strike) : strike_(strike) {} double operator()(double S) override; double strike() override {return strike_;} + [[nodiscard]] PayoffKind kind() const override { return PayoffKind::Digital; } private: double strike_; }; diff --git a/cpp/PricingEngine.cpp b/cpp/PricingEngine.cpp new file mode 100644 index 0000000..6a6699d --- /dev/null +++ b/cpp/PricingEngine.cpp @@ -0,0 +1,6 @@ +/** + * @file PricingEngine.cpp + * @brief @ref PricingEngine translation unit (interface only). + */ + +#include "PricingEngine.hpp" \ No newline at end of file diff --git a/src/PricingEngine.hpp b/cpp/PricingEngine.hpp similarity index 72% rename from src/PricingEngine.hpp rename to cpp/PricingEngine.hpp index 06bc254..e3d3c3b 100644 --- a/src/PricingEngine.hpp +++ b/cpp/PricingEngine.hpp @@ -1,6 +1,7 @@ -// -// Created by David Doebel on 05.03.2026. -// +/** + * @file PricingEngine.hpp + * @brief Abstract pricer for @ref Instrument given a stochastic model. + */ #ifndef QUANTENGINE_PRICINGENGINE_HPP #define QUANTENGINE_PRICINGENGINE_HPP @@ -10,6 +11,9 @@ class Instrument; +/** + * @brief Computes model price of an instrument (e.g. Monte Carlo, PDE, closed form). + */ class PricingEngine { public: PricingEngine() = default; diff --git a/src/RandomGenerator.cpp b/cpp/RandomGenerator.cpp similarity index 77% rename from src/RandomGenerator.cpp rename to cpp/RandomGenerator.cpp index 55f0043..59f6c52 100644 --- a/src/RandomGenerator.cpp +++ b/cpp/RandomGenerator.cpp @@ -1,6 +1,7 @@ -// -// Created by David Doebel on 06.03.2026. -// +/** + * @file RandomGenerator.cpp + * @brief @ref MersenneTwister implementation. + */ #include "RandomGenerator.hpp" diff --git a/src/RandomGenerator.hpp b/cpp/RandomGenerator.hpp similarity index 75% rename from src/RandomGenerator.hpp rename to cpp/RandomGenerator.hpp index e4df117..0f9aaed 100644 --- a/src/RandomGenerator.hpp +++ b/cpp/RandomGenerator.hpp @@ -1,11 +1,13 @@ -// -// Created by David Doebel on 06.03.2026. -// +/** + * @file RandomGenerator.hpp + * @brief Random numbers for Monte Carlo (Gaussian draws). + */ #ifndef QUANTENGINE_RANDOMGENERATOR_HPP #define QUANTENGINE_RANDOMGENERATOR_HPP #include +/** @brief Interface for standard normal variates. */ class RandomGenerator { public: RandomGenerator() = default; @@ -14,6 +16,7 @@ public: virtual std::vector nextGaussianVector(std::size_t n) = 0; }; +/** @brief @c std::mt19937 with normal distribution. */ class MersenneTwister : public RandomGenerator { public: MersenneTwister() = default; diff --git a/src/Statistics.cpp b/cpp/Statistics.cpp similarity index 90% rename from src/Statistics.cpp rename to cpp/Statistics.cpp index 0578fe1..89b39cd 100644 --- a/src/Statistics.cpp +++ b/cpp/Statistics.cpp @@ -1,6 +1,7 @@ -// -// Created by David Doebel on 06.03.2026. -// +/** + * @file Statistics.cpp + * @brief Streaming moment and extrema updates. + */ #include "Statistics.hpp" diff --git a/src/Statistics.hpp b/cpp/Statistics.hpp similarity index 74% rename from src/Statistics.hpp rename to cpp/Statistics.hpp index a4de629..e1aba3a 100644 --- a/src/Statistics.hpp +++ b/cpp/Statistics.hpp @@ -1,12 +1,15 @@ -// -// Created by David Doebel on 06.03.2026. -// +/** + * @file Statistics.hpp + * @brief Online sample moments for Monte Carlo diagnostics. + */ #ifndef QUANTENGINE_STATISTICS_HPP #define QUANTENGINE_STATISTICS_HPP #include - +/** + * @brief Accumulates count, mean/variance-related sums, and running min/max. + */ class Statistics { public: Statistics() : moments_({0., 0., 0.}), n(0), max_(0.), min_(0.) {} diff --git a/cpp/StochasticProcess.cpp b/cpp/StochasticProcess.cpp new file mode 100644 index 0000000..0839005 --- /dev/null +++ b/cpp/StochasticProcess.cpp @@ -0,0 +1,6 @@ +/** + * @file StochasticProcess.cpp + * @brief @ref StochasticProcess translation unit (interface only). + */ + +#include "StochasticProcess.hpp" \ No newline at end of file diff --git a/src/StochasticProcess.hpp b/cpp/StochasticProcess.hpp similarity index 77% rename from src/StochasticProcess.hpp rename to cpp/StochasticProcess.hpp index a0ebb49..f9b8e80 100644 --- a/src/StochasticProcess.hpp +++ b/cpp/StochasticProcess.hpp @@ -1,12 +1,16 @@ -// -// Created by David Doebel on 05.03.2026. -// +/** + * @file StochasticProcess.hpp + * @brief Interface for SDE drift, diffusion, and time stepping. + */ #ifndef QUANTENGINE_STOCHASTICPROCESS_HPP #define QUANTENGINE_STOCHASTICPROCESS_HPP #include "MarketData.hpp" #include +/** + * @brief Stochastic model for the underlying, driven by @ref MarketData. + */ class StochasticProcess { public: StochasticProcess() = delete; diff --git a/cpp/VolatilitySurface.cpp b/cpp/VolatilitySurface.cpp new file mode 100644 index 0000000..f66c6b3 --- /dev/null +++ b/cpp/VolatilitySurface.cpp @@ -0,0 +1,6 @@ +/** + * @file VolatilitySurface.cpp + * @brief @ref VolatilitySurface translation unit (interface only). + */ + +#include "VolatilitySurface.hpp" \ No newline at end of file diff --git a/cpp/VolatilitySurface.hpp b/cpp/VolatilitySurface.hpp new file mode 100644 index 0000000..5c6f2be --- /dev/null +++ b/cpp/VolatilitySurface.hpp @@ -0,0 +1,28 @@ +/** + * @file VolatilitySurface.hpp + * @brief Implied volatility as a function of strike and expiry. + */ + +#ifndef QUANTENGINE_VOLATILITYSURFACE_HPP +#define QUANTENGINE_VOLATILITYSURFACE_HPP +#include + +/** + * @brief Local/vol surface @f$\sigma(K,T)@f$ used by simulation. + */ +class VolatilitySurface { +public: + virtual ~VolatilitySurface() = default; + virtual double sigma(double K, double T) const = 0; +private: + +}; + +class SVI : public VolatilitySurface { +public: + SVI() = default; + SVI(std::vector K, std::vector rho, std::vector S, std::vector T); +}; + + +#endif //QUANTENGINE_VOLATILITYSURFACE_HPP diff --git a/cpp/YieldCurve.cpp b/cpp/YieldCurve.cpp new file mode 100644 index 0000000..fe80603 --- /dev/null +++ b/cpp/YieldCurve.cpp @@ -0,0 +1,6 @@ +/** + * @file YieldCurve.cpp + * @brief @ref YieldCurve translation unit (interface only). + */ + +#include "YieldCurve.hpp" \ No newline at end of file diff --git a/src/YieldCurve.hpp b/cpp/YieldCurve.hpp similarity index 78% rename from src/YieldCurve.hpp rename to cpp/YieldCurve.hpp index 7853382..7221b96 100644 --- a/src/YieldCurve.hpp +++ b/cpp/YieldCurve.hpp @@ -1,11 +1,14 @@ -// -// Created by David Doebel on 06.03.2026. -// +/** + * @file YieldCurve.hpp + * @brief Abstract yield curve: discount factors and zero rates. + */ #ifndef QUANTENGINE_YIELDCURVE_HPP #define QUANTENGINE_YIELDCURVE_HPP - +/** + * @brief Risk-free rate term structure for discounting and risk-neutral drift. + */ class YieldCurve { public: YieldCurve() = default; diff --git a/docs/Doxyfile b/docs/Doxyfile new file mode 100644 index 0000000..e75341e --- /dev/null +++ b/docs/Doxyfile @@ -0,0 +1,50 @@ +# Doxygen configuration for QuantEngine (option_pricing). +# Run from repo root: doxygen docs/Doxyfile +# Or: cmake --build build --target docs + +PROJECT_NAME = QuantEngine +PROJECT_BRIEF = "Monte Carlo option pricing, market data abstractions, and Python bindings" + +OUTPUT_DIRECTORY = docs/html +CREATE_SUBDIRS = NO +ALLOW_UNICODE_NAMES = YES + +JAVADOC_AUTOBRIEF = YES +QT_AUTOBRIEF = NO +OPTIMIZE_OUTPUT_FOR_CPLUSPLUS = YES + +FULL_PATH_NAMES = YES +STRIP_FROM_PATH = + +QUIET = NO +WARNINGS = YES +WARN_IF_UNDOCUMENTED = NO +WARN_NO_PARAMDOC = NO + +INPUT = cpp +INPUT_ENCODING = UTF-8 +FILE_PATTERNS = *.cpp *.hpp *.h +RECURSIVE = YES + +EXCLUDE_PATTERNS = +EXCLUDE_SYMBOLS = + +GENERATE_HTML = YES +HTML_OUTPUT = . +HTML_COLORSTYLE_HUE = 220 +GENERATE_LATEX = NO + +SEARCHENGINE = YES + +SOURCE_BROWSER = YES +REFERENCED_BY_RELATION = YES +REFERENCES_RELATION = YES + +ALPHABETICAL_INDEX = YES +ENABLE_PREPROCESSING = YES +MACRO_EXPANSION = NO + +CLASS_DIAGRAMS = YES +HAVE_DOT = NO + +PREDEFINED = DOXYGEN diff --git a/docs/SECURITY.md b/docs/SECURITY.md new file mode 100644 index 0000000..ec71ad6 --- /dev/null +++ b/docs/SECURITY.md @@ -0,0 +1,27 @@ +# Security Checklist + +## Secrets handling + +- Never commit `.env` or any file containing credentials. +- Use `.env.example` for non-sensitive defaults only. +- Set DB credentials through environment variables. +- Rotate credentials if they have ever appeared in git history. + +## Database hardening + +- Use a dedicated runtime user with least required privileges. +- Keep administrative users separate from ingestion users. +- Restrict DB network access to trusted hosts/VPC/private network. +- Enable SSL/TLS for non-local database connections. + +## Publication readiness + +Before making the repository public: + +1. Confirm `git status` has no secret files staged. +2. Search for potential secret patterns: + - passwords + - API keys + - tokens +3. Verify `.gitignore` includes local secret files (`.env*`). +4. Regenerate credentials used during development. diff --git a/docs/SETUP.md b/docs/SETUP.md new file mode 100644 index 0000000..01f347a --- /dev/null +++ b/docs/SETUP.md @@ -0,0 +1,60 @@ +# Setup Guide + +This guide describes a clean local setup for development and reproducible runs. + +## Prerequisites + +- Python 3.10+ +- CMake 3.16+ +- A C++20 compiler +- PostgreSQL 14+ (or Docker) +- On macOS, Homebrew packages for C++ DB support: + - `libpq` + - `libpqxx` + - `eigen` + - `pybind11` + +## Python dependencies + +```bash +python3 -m venv .venv +source .venv/bin/activate +pip install --upgrade pip +pip install -e . +pip install pandas yfinance sqlalchemy psycopg2-binary matplotlib scipy +``` + +## Environment configuration + +```bash +cp .env.example .env +``` + +Edit `.env` and set: + +- `DB_HOST`, `DB_PORT`, `DB_NAME`, `DB_USER`, `DB_PASSWORD` +- `PIPELINE_SYMBOLS` +- admin credentials used only by setup script (`POSTGRES_ADMIN_*`) + +## Database bootstrap + +```bash +source .env +python scripts/setup_postgres.py +``` + +The script is idempotent and safe to rerun. + +## Build and test C++ + +```bash +cmake -S . -B build +cmake --build build -j +ctest --test-dir build --output-on-failure +``` + +## Generate Doxygen docs + +```bash +cmake --build build --target docs +``` diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..cc96816 --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,24 @@ +[build-system] +requires = ["scikit-build-core>=0.5", "pybind11"] +build-backend = "scikit_build_core.build" + +[project] +name = "qengine" +version = "0.1.0" +description = "Quant engine with C++ backend" +authors = [{name = "David"}] +requires-python = ">=3.10" +dependencies = [ + "numpy", + "pandas", + "sqlalchemy", + "psycopg2-binary", + "yfinance", +] + +[tool.scikit-build] +# Keep separate from a local `cmake -B build` tree (different generators: Ninja vs Makefiles). +build-dir = "skbuild-build" +cmake.version = ">=3.16" +cmake.build-type = "Release" +cmake.define.BUILD_TESTING = "OFF" diff --git a/qengine/__init__.py b/qengine/__init__.py new file mode 100644 index 0000000..c4cb10e --- /dev/null +++ b/qengine/__init__.py @@ -0,0 +1,5 @@ +"""Qengine: quant pricing backend (native extension in qengine.qengine).""" + +from .qengine import bs_price + +__all__ = ["bs_price"] diff --git a/scripts/setup_postgres.py b/scripts/setup_postgres.py new file mode 100644 index 0000000..55fb84f --- /dev/null +++ b/scripts/setup_postgres.py @@ -0,0 +1,108 @@ +#!/usr/bin/env python3 +""" +Idempotent PostgreSQL bootstrap script for the option_pricing project. + +What it does: +1) Creates the project role if it does not exist. +2) Creates the project database if it does not exist. +3) Grants ownership/privileges. +4) Applies src/data/sql/schema.sql to the project database. + +Configuration comes from environment variables (see .env.example). +""" + +from __future__ import annotations + +import os +from pathlib import Path + +import psycopg2 +from psycopg2 import sql + + +ROOT = Path(__file__).resolve().parents[1] +SCHEMA_PATH = ROOT / "src" / "data" / "sql" / "schema.sql" + + +def _env(name: str, default: str | None = None) -> str: + value = os.getenv(name, default) + if value is None: + raise RuntimeError(f"Missing required environment variable: {name}") + return value + + +def admin_connect(dbname: str): + return psycopg2.connect( + dbname=dbname, + user=_env("POSTGRES_ADMIN_USER", "postgres"), + password=_env("POSTGRES_ADMIN_PASSWORD", "postgres"), + host=_env("POSTGRES_ADMIN_HOST", "localhost"), + port=_env("POSTGRES_ADMIN_PORT", "5432"), + ) + + +def ensure_role_and_database() -> None: + db_user = _env("DB_USER", "quant_user") + db_password = _env("DB_PASSWORD", "") + db_name = _env("DB_NAME", "options_db") + + admin_db = _env("POSTGRES_ADMIN_DB", "postgres") + with admin_connect(admin_db) as conn: + conn.autocommit = True + with conn.cursor() as cur: + cur.execute("SELECT 1 FROM pg_roles WHERE rolname = %s", (db_user,)) + role_exists = cur.fetchone() is not None + if not role_exists: + cur.execute( + sql.SQL("CREATE ROLE {} WITH LOGIN PASSWORD %s").format( + sql.Identifier(db_user) + ), + (db_password,), + ) + else: + cur.execute( + sql.SQL("ALTER ROLE {} WITH LOGIN PASSWORD %s").format( + sql.Identifier(db_user) + ), + (db_password,), + ) + + cur.execute("SELECT 1 FROM pg_database WHERE datname = %s", (db_name,)) + db_exists = cur.fetchone() is not None + if not db_exists: + cur.execute( + sql.SQL("CREATE DATABASE {} OWNER {}").format( + sql.Identifier(db_name), + sql.Identifier(db_user), + ) + ) + else: + cur.execute( + sql.SQL("ALTER DATABASE {} OWNER TO {}").format( + sql.Identifier(db_name), + sql.Identifier(db_user), + ) + ) + + +def apply_schema() -> None: + if not SCHEMA_PATH.exists(): + raise FileNotFoundError(f"Schema file not found: {SCHEMA_PATH}") + + schema_sql = SCHEMA_PATH.read_text(encoding="utf-8") + with admin_connect(_env("DB_NAME", "options_db")) as conn: + conn.autocommit = True + with conn.cursor() as cur: + cur.execute(schema_sql) + + +def main() -> None: + print("Ensuring role/database exist...") + ensure_role_and_database() + print("Applying schema...") + apply_schema() + print("Database setup complete.") + + +if __name__ == "__main__": + main() diff --git a/scripts/test_qengine_bindings.py b/scripts/test_qengine_bindings.py new file mode 100644 index 0000000..a523de8 --- /dev/null +++ b/scripts/test_qengine_bindings.py @@ -0,0 +1,65 @@ +#!/usr/bin/env python3 +"""Smoke test: use an installed `qengine` package (pip install .) or a dev build (cmake -> qengine/*.so).""" + +from __future__ import annotations + +import math +import sys +from pathlib import Path + +# Running `python scripts/this.py` puts `scripts/` on sys.path, not the repo root +_REPO_ROOT = Path(__file__).resolve().parents[1] +if str(_REPO_ROOT) not in sys.path: + sys.path.insert(0, str(_REPO_ROOT)) + + +def main() -> int: + try: + import qengine + except ImportError as e: + print( + f"Import failed ({e}). Install the package (pip install .) or build with CMake so " + "qengine/qengine.*.so exists next to qengine/__init__.py.", + file=sys.stderr, + ) + return 1 + + call = qengine.bs_price(100.0, 100.0, 1.0, 0.05, 0.2, True) + put = qengine.bs_price(100.0, 100.0, 1.0, 0.05, 0.2, False) + batch_list = qengine.bs_price( + [100.0, 100.0], + [100.0, 110.0], + [1.0, 1.0], + [0.05, 0.05], + [0.2, 0.2], + [True, False], + ) + + assert math.isfinite(call) and math.isfinite(put) + assert len(batch_list) == 2 and all(math.isfinite(x) for x in batch_list) + + print("qengine.bs_price (call):", call) + print("qengine.bs_price (put):", put) + print("qengine.bs_price (list batch):", list(batch_list)) + + try: + import numpy as np + except ImportError: + print("ok: overloads callable (NumPy not installed; skipped ndarray batch test).") + return 0 + + s = np.array([100.0, 100.0], dtype=np.float64) + k = np.array([100.0, 110.0], dtype=np.float64) + t = np.array([1.0, 1.0], dtype=np.float64) + r = np.array([0.05, 0.05], dtype=np.float64) + sig = np.array([0.2, 0.2], dtype=np.float64) + opt = np.array([True, False], dtype=bool) + batch_np = qengine.bs_price(s, k, t, r, sig, opt) + assert len(batch_np) == 2 and all(math.isfinite(float(x)) for x in batch_np) + print("qengine.bs_price (ndarray batch):", [float(x) for x in batch_np]) + print("ok: overloads callable.") + return 0 + + +if __name__ == "__main__": + raise SystemExit(main()) diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt deleted file mode 100644 index 030c61a..0000000 --- a/src/CMakeLists.txt +++ /dev/null @@ -1,41 +0,0 @@ -add_library(qengine - Instrument.cpp - Instrument.hpp - Payoff.cpp - Payoff.hpp - Option.cpp - Option.hpp - PricingEngine.cpp - PricingEngine.hpp - MonteCarloEngine.cpp - MonteCarloEngine.hpp - StochasticProcess.cpp - StochasticProcess.hpp - Exercise.cpp - Exercise.hpp - MarketData.cpp - MarketData.hpp - YieldCurve.cpp - YieldCurve.hpp - VolatilitySurface.cpp - VolatilitySurface.hpp - RandomGenerator.cpp - RandomGenerator.hpp - Statistics.cpp - Statistics.hpp - BlackScholesProcess.cpp - BlackScholesProcess.hpp - DBIngest.cpp - DBIngest.hpp - GaussSolver.cpp - GaussSolver.hpp - - -) - -target_include_directories(qengine PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}) -target_include_directories(qengine PRIVATE - /opt/homebrew/include -) -target_link_libraries(qengine Eigen3::Eigen) -target_link_libraries(qengine pqxx pq) \ No newline at end of file diff --git a/src/Exercise.cpp b/src/Exercise.cpp deleted file mode 100644 index fdadc45..0000000 --- a/src/Exercise.cpp +++ /dev/null @@ -1,5 +0,0 @@ -// -// Created by David Doebel on 05.03.2026. -// - -#include "Exercise.hpp" \ No newline at end of file diff --git a/src/ImpliedVolatility/__init__.py b/src/ImpliedVolatility/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/src/ImpliedVolatility/compute_vls.py b/src/ImpliedVolatility/compute_vls.py new file mode 100644 index 0000000..ef68ed2 --- /dev/null +++ b/src/ImpliedVolatility/compute_vls.py @@ -0,0 +1,49 @@ +import numpy as np +import qengine +from scipy.optimize import brentq + + +def implied_vol(price, S, K, T, r, call): + """ + Implied vol for each row. Arguments may be scalars or 1-D arrays-like (same length). + """ + price = np.asarray(price, dtype=np.float64) + S = np.asarray(S, dtype=np.float64) + K = np.asarray(K, dtype=np.float64) + T = np.asarray(T, dtype=np.float64) + call = np.asarray(call, dtype=bool) + r = float(r) + + scalar_in = price.ndim == 0 + if scalar_in: + price = np.atleast_1d(price) + S = np.atleast_1d(S) + K = np.atleast_1d(K) + T = np.atleast_1d(T) + call = np.atleast_1d(call) + + n = price.shape[0] + if (S.shape[0] != n or K.shape[0] != n or T.shape[0] != n or call.shape[0] != n): + raise ValueError( + f"implied_vol: length mismatch price={n}, S={S.shape[0]}, K={K.shape[0]}, " + f"T={T.shape[0]}, call={call.shape[0]}" + ) + + out = np.full(n, np.nan, dtype=np.float64) + for i in range(n): + p, s, k, t, c = float(price[i]), float(S[i]), float(K[i]), float(T[i]), bool(call[i]) + if not np.isfinite(p) or not np.isfinite(s) or not np.isfinite(k) or not np.isfinite(t): + continue + if s <= 0 or k <= 0 or t <= 0: + continue + try: + def f(sig: float) -> float: + return qengine.bs_price(s, k, t, r, sig, c) - p + + out[i] = brentq(f, 1e-6, 5.0) + except (ValueError, RuntimeError): + out[i] = np.nan + + if scalar_in: + return float(out[0]) + return out diff --git a/src/ImpliedVolatility/setup.py b/src/ImpliedVolatility/setup.py new file mode 100644 index 0000000..e69de29 diff --git a/src/ImpliedVolatility/svi.py b/src/ImpliedVolatility/svi.py new file mode 100644 index 0000000..dab56f5 --- /dev/null +++ b/src/ImpliedVolatility/svi.py @@ -0,0 +1,1023 @@ +import numpy as np +import pandas as pd +from scipy.optimize import least_squares, minimize + +# --------------------------------------------------------------------------- +# Core SVI math +# --------------------------------------------------------------------------- + + +def svi_total_variance( + k: np.ndarray, + a: float, + b: float, + rho: float, + m: float, + sigma: float, +) -> np.ndarray: + """Total variance w(k) = a + b * (rho * (k - m) + sqrt((k - m)^2 + sigma^2)).""" + km = k - m + root = np.sqrt(km**2 + sigma**2) + return a + b * (rho * km + root) + + +def svi_jacobian_params( + k: np.ndarray, a: float, b: float, rho: float, m: float, sigma: float +) -> np.ndarray: + """Jacobian (n, 5): columns [da, db, drho, dm, dsigma] of w(k).""" + km = k - m + root = np.maximum(np.sqrt(km**2 + sigma**2), 1e-14) + return np.column_stack( + [ + np.ones_like(k), + rho * km + root, + b * km, + -b * (rho + km / root), + b * (sigma / root), + ] + ) + + +def log_forward_moneyness( + strike: np.ndarray, + spot: np.ndarray, + T: np.ndarray, + r: float, +) -> np.ndarray: + """k = log(K / F) with F = S * exp(r * T).""" + fwd = spot * np.exp(r * np.asarray(T, dtype=np.float64)) + return np.log(np.asarray(strike, dtype=np.float64) / fwd) + + +def total_variance_from_iv(iv: np.ndarray, T: np.ndarray) -> np.ndarray: + """w = sigma^2 * T.""" + iv = np.asarray(iv, dtype=np.float64) + T = np.asarray(T, dtype=np.float64) + return iv**2 * T + + +# --------------------------------------------------------------------------- +# Data preparation (load_data.merge + compute_iv output) +# --------------------------------------------------------------------------- + + +def prepare_svi_inputs( + df: pd.DataFrame, + *, + spot_col: str = "spot", + strike_col: str = "strike", + T_col: str = "T", + iv_col: str = "iv", + r: float = 0.05, + spread_col: Optional[str] = "spread", +) -> pd.DataFrame: + """ + Add columns ``log_moneyness`` (k = log K/F) and ``total_var`` (w = iv^2 T). + If ``spread_col`` is present, adds ``svi_weight`` ~ 1 / spread^2 for weighted fits. + Drops rows with non-finite inputs or non-positive T. + """ + out = df.copy() + S = out[spot_col].to_numpy(dtype=np.float64) + K = out[strike_col].to_numpy(dtype=np.float64) + T = out[T_col].to_numpy(dtype=np.float64) + iv = out[iv_col].to_numpy(dtype=np.float64) + + valid = ( + np.isfinite(S) + & np.isfinite(K) + & np.isfinite(T) + & np.isfinite(iv) + & (S > 0) + & (K > 0) + & (T > 0) + & (iv > 0) + ) + out = out.loc[valid].copy() + S = out[spot_col].to_numpy(dtype=np.float64) + K = out[strike_col].to_numpy(dtype=np.float64) + T = out[T_col].to_numpy(dtype=np.float64) + iv = out[iv_col].to_numpy(dtype=np.float64) + + out["log_moneyness"] = log_forward_moneyness(K, S, T, r) + out["total_var"] = total_variance_from_iv(iv, T) + if spread_col and spread_col in out.columns: + sp = out[spread_col].to_numpy(dtype=np.float64) + out["svi_weight"] = 1.0 / np.maximum(sp**2, 1e-12) + return out + + +# --------------------------------------------------------------------------- +# Loss helpers (optional smooth Huber-style path via L-BFGS-B) +# --------------------------------------------------------------------------- + + +def huber_loss(residual: np.ndarray, delta: float = 0.01) -> np.ndarray: + ar = np.abs(residual) + return np.where(ar < delta, 0.5 * residual**2, delta * (ar - 0.5 * delta)) + + +def svi_huber_objective( + x: np.ndarray, + k: np.ndarray, + w_obs: np.ndarray, + sqrt_wts: np.ndarray, + lam: float, +) -> float: + a, b, rho, m, sigma = x + if b <= 0 or sigma <= 0 or abs(rho) >= 1.0: + return 1e12 + w_fit = svi_total_variance(k, a, b, rho, m, sigma) + resid = w_fit - w_obs + loss = np.mean(huber_loss(sqrt_wts * resid / (np.mean(sqrt_wts) + 1e-12))) + reg = lam * (a**2 + b**2 + m**2 + sigma**2) + return float(loss + reg) + + +# --------------------------------------------------------------------------- +# Fitting +# --------------------------------------------------------------------------- + + +@dataclass +class SVIParams: + a: float + b: float + rho: float + m: float + sigma: float + + def total_var(self, k: np.ndarray) -> np.ndarray: + return svi_total_variance(k, self.a, self.b, self.rho, self.m, self.sigma) + + +@dataclass +class SVISliceFit: + """Single-expiry calibration result.""" + + params: SVIParams + success: bool + cost: float + message: str + n_points: int + T_mean: float + group_key: str + + +@dataclass +class SVISurfaceFit: + slices: list[SVISliceFit] + meta: Mapping[str, object] + + +def _butterfly_constraints_ok(a: float, b: float, rho: float, m: float, sigma: float) -> bool: + """ + Gatheral-style no-butterfly constraints for raw SVI: + + - b > 0, sigma > 0, |rho| < 1 + - a + b*sigma*sqrt(1-rho^2) >= 0 (minimum variance >= 0) + - b*(1+|rho|) < 2 (wing slopes controlled) + """ + if not (b > 0.0 and sigma > 0.0 and abs(rho) < 1.0): + return False + if a + b * sigma * np.sqrt(max(1.0 - rho * rho, 0.0)) < 0.0: + return False + if b * (1.0 + abs(rho)) >= 2.0: + return False + return True + + +def _butterfly_violation_terms(a: float, b: float, rho: float, m: float, sigma: float) -> np.ndarray: + """ + Soft butterfly / wing violations as non-negative terms v_i such that + sum(v_i**2) is the arbitrage penalty used in the loss. + """ + # minimum variance >= 0 + min_var = a + b * sigma * np.sqrt(max(1.0 - rho * rho, 0.0)) + v_min = max(0.0, -min_var) + # wing slope constraint b(1+|rho|) < 2 + wing = b * (1.0 + abs(rho)) - 2.0 + v_wing = max(0.0, wing) + return np.array([v_min, v_wing], dtype=np.float64) + + +def _initial_guess(k: np.ndarray, w: np.ndarray) -> np.ndarray: + m0 = float(np.average(k, weights=np.clip(w, 1e-6, None))) + a0 = float(np.clip(np.percentile(w, 35), 1e-6, None)) + b0 = 0.25 + rho0 = -0.4 + sigma0 = 0.15 + return np.array([a0, b0, rho0, m0, sigma0], dtype=np.float64) + + +def _bounds(k: np.ndarray, w: np.ndarray) -> tuple[np.ndarray, np.ndarray]: + w_max = float(np.max(w) * 1.5 + 0.1) + km = float(np.max(np.abs(k)) + 0.25) + lo = np.array([0.0, 1e-5, -0.999, -km, 1e-4], dtype=np.float64) + hi = np.array([w_max, 10.0, 0.999, km, 5.0], dtype=np.float64) + return lo, hi + + +def fit_svi_slice( + k: np.ndarray, + w_obs: np.ndarray, + *, + weights: Optional[np.ndarray] = None, + sqrt_weights: Optional[np.ndarray] = None, + huber_delta: float = 0.02, + reg_lambda: float = 1e-6, + method: str = "least_squares", + verbose: int = 0, +) -> tuple[SVIParams, object]: + """ + Calibrate one SVI slice. + + Parameters + ---------- + method + ``least_squares`` — recommended (soft_l1 + analytic Jacobian). + ``lbfgs`` — smooth Huber objective with L-BFGS-B (bounds). + """ + k = np.asarray(k, dtype=np.float64).ravel() + w_obs = np.asarray(w_obs, dtype=np.float64).ravel() + if k.shape != w_obs.shape: + raise ValueError("k and w_obs must have the same shape") + + if sqrt_weights is not None: + sw = np.asarray(sqrt_weights, dtype=np.float64).ravel() + elif weights is not None: + wts = np.asarray(weights, dtype=np.float64).ravel() + sw = np.sqrt(np.maximum(wts, 1e-12)) + else: + sw = np.ones_like(w_obs) + + mask = np.isfinite(k) & np.isfinite(w_obs) & (w_obs > 0) & np.isfinite(sw) + k, w_obs, sw = k[mask], w_obs[mask], sw[mask] + if k.size < 5: + raise ValueError("Need at least 5 valid points to fit SVI") + + x0 = _initial_guess(k, w_obs) + lo, hi = _bounds(k, w_obs) + + if method == "least_squares": + + def residuals(x: np.ndarray) -> np.ndarray: + a, b, rho, m, sig = x + wf = svi_total_variance(k, a, b, rho, m, sig) + if not np.all(np.isfinite(wf)) or np.any(wf <= 0.0): + return np.full_like(w_obs, 1e3) + return sw * (wf - w_obs) + + def jac(x: np.ndarray) -> np.ndarray: + a, b, rho, m, sig = x + jw = svi_jacobian_params(k, a, b, rho, m, sig) + return sw[:, None] * jw + + sol = least_squares( + lambda x: np.concatenate( + [ + residuals(x), + ( + np.sqrt(reg_lambda) * _butterfly_violation_terms(*x) + if reg_lambda > 0.0 + else np.zeros(2, dtype=np.float64) + ), + ] + ), + x0, + bounds=(lo, hi), + loss="soft_l1", + f_scale=huber_delta, + ftol=1e-10, + xtol=1e-10, + gtol=1e-10, + verbose=verbose, + max_nfev=2000, + ) + x = sol.x + params = SVIParams(float(x[0]), float(x[1]), float(x[2]), float(x[3]), float(x[4])) + return params, sol + + if method == "lbfgs": + + def obj_lbfgs(x: np.ndarray) -> float: + a, b, rho, m, sig = x + base = svi_huber_objective(x, k, w_obs, sw, reg_lambda) + # soft butterfly / wing penalty (same reg_lambda weight) + if reg_lambda > 0.0: + v = _butterfly_violation_terms(a, b, rho, m, sig) + base += reg_lambda * float(np.sum(v * v)) + return base + + sol = minimize( + obj_lbfgs, + x0, + method="L-BFGS-B", + bounds=list(zip(lo, hi)), + options={"ftol": 1e-12, "maxiter": 1500}, + ) + x = sol.x + params = SVIParams(float(x[0]), float(x[1]), float(x[2]), float(x[3]), float(x[4])) + return params, sol + + raise ValueError(f"Unknown method: {method}") + + +def fit_svi_surface( + df: pd.DataFrame, + *, + group_col: str = "expiration_date", + T_col: str = "T", + weight_col: Optional[str] = "svi_weight", + min_points: int = 5, + fit_kwargs: Optional[dict] = None, +) -> SVISurfaceFit: + """ + Fit one SVI slice per expiry (or other grouping column). + + Expects columns from :func:`prepare_svi_inputs`: ``log_moneyness``, ``total_var``. + """ + fit_kwargs = fit_kwargs or {} + need = {"log_moneyness", "total_var"} + missing = need - set(df.columns) + if missing: + raise KeyError(f"DataFrame missing columns {missing}; run prepare_svi_inputs first") + + slices: list[SVISliceFit] = [] + + # sort groups by average maturity (for convenience only; no calendar coupling here) + grouped: list[tuple[object, pd.DataFrame, float]] = [] + for key, g in df.groupby(group_col, sort=True): + g = g.sort_values("log_moneyness") + T_mean = float(g[T_col].mean()) if T_col in g.columns else float("nan") + grouped.append((key, g, T_mean)) + + grouped.sort(key=lambda tup: tup[2]) + + for key, g, T_mean in grouped: + k = g["log_moneyness"].to_numpy(dtype=np.float64) + w = g["total_var"].to_numpy(dtype=np.float64) + if len(g) < min_points: + continue + try: + slice_kwargs = dict(fit_kwargs) if fit_kwargs else {} + if weight_col and weight_col in g.columns: + slice_kwargs = {**slice_kwargs, "weights": g[weight_col].to_numpy(dtype=np.float64)} + params, raw = fit_svi_slice(k, w, **slice_kwargs) + if hasattr(raw, "success"): + success = bool(raw.success) # type: ignore[attr-defined] + msg = str(getattr(raw, "message", "")) + cost = float(getattr(raw, "cost", np.nan)) # type: ignore[arg-type] + else: + success = True + msg = "" + cost = float("nan") + except ValueError as e: + slices.append( + SVISliceFit( + params=SVIParams(0, 0, 0, 0, 1), + success=False, + cost=float("nan"), + message=str(e), + n_points=len(g), + T_mean=T_mean, + group_key=str(key), + ) + ) + continue + + slices.append( + SVISliceFit( + params=params, + success=success, + cost=cost, + message=msg, + n_points=len(g), + T_mean=T_mean, + group_key=str(key), + ) + ) + + meta = {"group_col": group_col, "n_slices_attempted": df[group_col].nunique()} + return SVISurfaceFit(slices=slices, meta=meta) + + +# --------------------------------------------------------------------------- +# Output tables +# --------------------------------------------------------------------------- + + +def surface_params_dataframe(fit: SVISurfaceFit) -> pd.DataFrame: + """Wide table of calibrated parameters per slice.""" + rows = [] + for s in fit.slices: + p = s.params + rows.append( + { + "group_key": s.group_key, + "T_mean": s.T_mean, + "n_points": s.n_points, + "success": s.success, + "cost": s.cost, + "a": p.a, + "b": p.b, + "rho": p.rho, + "m": p.m, + "sigma": p.sigma, + "message": s.message, + } + ) + return pd.DataFrame(rows) + + +# --------------------------------------------------------------------------- +# Parameter smoothing across maturity (for calendar consistency diagnostics) +# --------------------------------------------------------------------------- + + +@dataclass +class SmoothedSVICurves: + T_knots: np.ndarray + a_spline: "UnivariateSpline" + logb_spline: "UnivariateSpline" + u_spline: "UnivariateSpline" # atanh(rho) + m_spline: "UnivariateSpline" + logsig_spline: "UnivariateSpline" + + def params_at(self, T: np.ndarray | float) -> SVIParams | list[SVIParams]: + T_arr = np.asarray(T, dtype=np.float64).ravel() + a = self.a_spline(T_arr) + b = np.exp(self.logb_spline(T_arr)) + rho = np.tanh(self.u_spline(T_arr)) + m = self.m_spline(T_arr) + sigma = np.exp(self.logsig_spline(T_arr)) + if T_arr.size == 1: + # a, b, rho, m, sigma are 1D arrays here; index the single element + return SVIParams( + float(a[0]), + float(b[0]), + float(rho[0]), + float(m[0]), + float(sigma[0]), + ) + return [SVIParams(float(ai), float(bi), float(ri), float(mi), float(si)) for ai, bi, ri, mi, si in zip(a, b, rho, m, sigma)] + + def total_var(self, k: np.ndarray, T: np.ndarray | float) -> np.ndarray: + T_arr = np.asarray(T, dtype=np.float64) + if T_arr.ndim == 0: + p = self.params_at(float(T_arr)) + return p.total_var(np.asarray(k, dtype=np.float64)) + # broadcast over grid (T_i, k_j) + k_arr = np.asarray(k, dtype=np.float64).ravel() + out = np.empty((T_arr.size, k_arr.size), dtype=np.float64) + for i, Ti in enumerate(T_arr.ravel()): + p = self.params_at(float(Ti)) + out[i, :] = p.total_var(k_arr) + return out + + +def smooth_svi_parameters( + params_df: pd.DataFrame, + *, + T_col: str = "T_mean", + smooth_factor_a: float = 1e-4, + smooth_factor_m: float = 1e-4, + smooth_factor_others: float = 0.0, + min_T: float = 0.0, + weight_col: str = "n_points", +) -> SmoothedSVICurves: + """ + Phase 4–5 from the note: + + - take per-slice parameter table (T, a, b, rho, m, sigma) + - apply transformations (log b, atanh rho, log sigma) + - spline-smooth each vs T. + """ + from scipy.interpolate import UnivariateSpline + + df = params_df.copy() + df = df[df["success"]].copy() + df = df[np.isfinite(df[T_col])] + df = df[df[T_col] > min_T] + if df.empty: + raise ValueError("smooth_svi_parameters: no valid slices after filtering.") + + df = df.sort_values(T_col).drop_duplicates(T_col) + T = df[T_col].to_numpy(dtype=np.float64) + a = df["a"].to_numpy(dtype=np.float64) + b = df["b"].to_numpy(dtype=np.float64) + rho = df["rho"].to_numpy(dtype=np.float64) + m = df["m"].to_numpy(dtype=np.float64) + sigma = df["sigma"].to_numpy(dtype=np.float64) + + # transformed parameters + # normalize b and sigma by sqrt(T) to stabilize term-structure behaviour + sqrtT = np.sqrt(np.maximum(T, 1e-8)) + logb = np.log(np.maximum(b / sqrtT, 1e-8)) + u = np.arctanh(np.clip(rho, -0.999, 0.999)) + logsig = np.log(np.maximum(sigma / sqrtT, 1e-6)) + + w = df[weight_col].to_numpy(dtype=np.float64) if weight_col in df.columns else None + + # first-pass light smoothing per parameter + a_spl_1 = UnivariateSpline(T, a, w=w, s=smooth_factor_a) + logb_spl_1 = UnivariateSpline(T, logb, w=w, s=smooth_factor_others) + u_spl_1 = UnivariateSpline(T, u, w=w, s=smooth_factor_others) + m_spl_1 = UnivariateSpline(T, m, w=w, s=smooth_factor_m) + logsig_spl_1 = UnivariateSpline(T, logsig, w=w, s=smooth_factor_others) + + a_s = a_spl_1(T) + logb_s = logb_spl_1(T) + u_s = u_spl_1(T) + m_s = m_spl_1(T) + logsig_s = logsig_spl_1(T) + + # enforce monotone ATM total variance by adjusting a(T) only + atm_w_raw = [] + for i, (ai, lbi, ui, mi, lsi) in enumerate(zip(a_s, logb_s, u_s, m_s, logsig_s)): + Ti = float(T[i]) + scale = np.sqrt(max(1e-8, Ti)) + pi = SVIParams( + float(ai), + float(np.exp(lbi) * scale), + float(np.tanh(ui)), + float(mi), + float(np.exp(lsi) * scale), + ) + atm_w_raw.append(pi.total_var(np.array([0.0], dtype=np.float64))[0]) + atm_w_raw = np.asarray(atm_w_raw, dtype=np.float64) + atm_w_mono = np.maximum.accumulate(atm_w_raw) + delta_a = atm_w_mono - atm_w_raw + a_corr = a_s + delta_a + + # final splines built from corrected / smoothed arrays (with very small or zero extra smoothing) + a_spl = UnivariateSpline(T, a_corr, w=w, s=0.0) + logb_spl = UnivariateSpline(T, logb_s, w=w, s=0.0) + u_spl = UnivariateSpline(T, u_s, w=w, s=0.0) + m_spl = UnivariateSpline(T, m_s, w=w, s=0.0) + logsig_spl = UnivariateSpline(T, logsig_s, w=w, s=0.0) + + return SmoothedSVICurves( + T_knots=T, + a_spline=a_spl, + logb_spline=logb_spl, + u_spline=u_spl, + m_spline=m_spl, + logsig_spline=logsig_spl, + ) + + +def calendar_violation_matrix( + curves: SmoothedSVICurves, + T_grid: np.ndarray, + k_grid: np.ndarray, +) -> np.ndarray: + """ + Phase 6–7 diagnostic: + + On a (T, k) grid, compute w(T_{j+1}, k) - w(T_j, k). + Negative entries indicate calendar violations. + """ + T_grid = np.asarray(T_grid, dtype=np.float64).ravel() + k_grid = np.asarray(k_grid, dtype=np.float64).ravel() + if T_grid.size < 2: + raise ValueError("Need at least two maturities in T_grid for calendar diagnostics.") + w = curves.total_var(k_grid, T_grid) # shape (nT, nK) + diff = w[1:, :] - w[:-1, :] + return diff + + +def evaluate_surface_on_grid( + fit: SVISurfaceFit, + k_grid: np.ndarray, + *, + valid_only: bool = True, +) -> pd.DataFrame: + """ + Evaluate each successful slice on ``k_grid``. Returns long DataFrame: + ``group_key``, ``T_mean``, ``log_moneyness``, ``total_var_model``. + """ + k_grid = np.asarray(k_grid, dtype=np.float64).ravel() + parts = [] + for s in fit.slices: + if valid_only and not s.success: + continue + w = s.params.total_var(k_grid) + parts.append( + pd.DataFrame( + { + "group_key": s.group_key, + "T_mean": s.T_mean, + "log_moneyness": k_grid, + "total_var_model": w, + "iv_model": np.sqrt(np.maximum(w, 0) / np.maximum(s.T_mean, 1e-12)), + } + ) + ) + if not parts: + return pd.DataFrame( + columns=[ + "group_key", + "T_mean", + "log_moneyness", + "total_var_model", + "iv_model", + ] + ) + return pd.concat(parts, ignore_index=True) + + +# --------------------------------------------------------------------------- +# Plotting +# --------------------------------------------------------------------------- + + +def plot_svi_surface_fit( + df_prep: pd.DataFrame, + fit: SVISurfaceFit, + *, + group_col: str = "expiration_date", + n_grid: int = 120, + iv_space: bool = True, + figsize: tuple[float, float] = (10, 6), + save_path: Optional[str] = None, + show: bool = False, +) -> tuple[object, object]: + """ + Plot market total variance (or IV) vs k with SVI overlays per expiry. + + Parameters + ---------- + df_prep + Output of :func:`prepare_svi_inputs` (must include ``log_moneyness``, ``total_var``, ``T``). + """ + import matplotlib.pyplot as plt + + try: + cmap = plt.colormaps["viridis"] + except (AttributeError, KeyError): + from matplotlib import cm as _cm + + cmap = _cm.get_cmap("viridis") + + fig, ax = plt.subplots(figsize=figsize) + k_all = df_prep["log_moneyness"].to_numpy() + k_min, k_max = float(np.min(k_all)), float(np.max(k_all)) + pad = 0.05 * (k_max - k_min + 1e-6) + k_grid = np.linspace(k_min - pad, k_max + pad, n_grid) + + ok = [s for s in fit.slices if s.success] + if not ok: + ax.set_title("SVI surface fit (no successful slices)") + return fig, ax + + n_ok = max(len(ok), 1) + for i, s in enumerate(sorted(ok, key=lambda x: x.T_mean)): + color = cmap(i / max(n_ok - 1, 1)) if n_ok > 1 else cmap(0.5) + Tm = s.T_mean + if group_col in df_prep.columns: + sub = df_prep[df_prep[group_col].astype(str) == str(s.group_key)] + else: + sub = ( + df_prep[np.isclose(df_prep["T"], Tm, rtol=0.02, atol=1e-4)] + if "T" in df_prep.columns + else df_prep + ) + + k_m = sub["log_moneyness"].to_numpy() + w_m = sub["total_var"].to_numpy() + if iv_space: + iv_m = np.sqrt(np.maximum(w_m, 0) / np.maximum(Tm, 1e-12)) + ax.scatter(k_m, iv_m, s=18, alpha=0.6, color=color, marker="o", label=None) + wg = s.params.total_var(k_grid) + iv_g = np.sqrt(np.maximum(wg, 0) / np.maximum(Tm, 1e-12)) + ax.plot(k_grid, iv_g, color=color, lw=2, label=f"T≈{Tm:.3f} ({s.group_key})") + ax.set_ylabel("implied vol") + else: + ax.scatter(k_m, w_m, s=18, alpha=0.6, color=color, marker="o", label=None) + ax.plot(k_grid, s.params.total_var(k_grid), color=color, lw=2, label=f"T≈{Tm:.3f}") + ax.set_ylabel("total variance w") + + ax.set_xlabel("log moneyness log(K/F)") + ax.legend(loc="best", fontsize=8) + ax.set_title("SVI slices vs market") + ax.grid(True, alpha=0.3) + fig.tight_layout() + if save_path: + fig.savefig(save_path, bbox_inches="tight") + if show: + plt.show() + return fig, ax + + +def plot_residual_heatmap( + df_prep: pd.DataFrame, + fit: SVISurfaceFit, + *, + figsize: tuple[float, float] = (9, 4), + save_path: Optional[str] = None, + show: bool = False, +) -> tuple[object, object]: + """Simple heatmap of (w_model - w_market) / w_market by slice and moneyness bin.""" + import matplotlib.pyplot as plt + + rows = [] + for _, row in df_prep.iterrows(): + k = float(row["log_moneyness"]) + w = float(row["total_var"]) + Tm = float(row["T"]) if "T" in row.index else float("nan") + gkey = str(row.get("expiration_date", "")) + match = None + for s in fit.slices: + if s.success and (str(s.group_key) == gkey or np.isclose(s.T_mean, Tm, rtol=0.05, atol=1e-3)): + match = s + break + if match is None: + continue + w_hat = float(match.params.total_var(np.array([k]))[0]) + rel = (w_hat - w) / max(w, 1e-12) + rows.append({"T_mean": Tm, "k": k, "rel_err": rel, "group_key": gkey}) + + if not rows: + fig, ax = plt.subplots(figsize=figsize) + ax.set_title("No overlap for residual map") + return fig, ax + + rdf = pd.DataFrame(rows) + fig, ax = plt.subplots(figsize=figsize) + sc = ax.scatter(rdf["k"], rdf["T_mean"], c=rdf["rel_err"], cmap="coolwarm", s=35, vmin=-0.2, vmax=0.2) + fig.colorbar(sc, ax=ax, label="relative w error (model - mkt) / mkt") + ax.set_xlabel("log(K/F)") + ax.set_ylabel("T (years)") + ax.set_title("SVI relative variance residuals") + fig.tight_layout() + if save_path: + fig.savefig(save_path, bbox_inches="tight") + if show: + plt.show() + return fig, ax + + +# --------------------------------------------------------------------------- +# Finplot plotting (interactive) +# --------------------------------------------------------------------------- + + +def _rgba_to_hex(rgba: tuple[float, float, float, float]) -> str: + r, g, b, _a = rgba + return "#{:02x}{:02x}{:02x}".format(int(r * 255), int(g * 255), int(b * 255)) + + +def _maybe_import_finplot(): + try: + import finplot as fplt # type: ignore + + return fplt + except Exception: + return None + + +def plot_svi_surface_fit_finplot( + df_prep: pd.DataFrame, + fit: SVISurfaceFit, + *, + group_col: str = "expiration_date", + n_grid: int = 120, + iv_space: bool = True, + show: bool = True, + title: str = "SVI slices (finplot)", +): + """ + Interactive finplot rendering of per-expiry SVI curves. + + Note: finplot is primarily interactive; saving to PDF/PNG is not implemented here. + """ + fplt = _maybe_import_finplot() + if fplt is None: + raise ImportError("finplot is not installed. Use plot_backend='matplotlib' or install finplot.") + + import matplotlib.pyplot as plt + + k_all = df_prep["log_moneyness"].to_numpy(dtype=np.float64) + k_min, k_max = float(np.min(k_all)), float(np.max(k_all)) + pad = 0.05 * (k_max - k_min + 1e-6) + k_grid = np.linspace(k_min - pad, k_max + pad, n_grid) + + ok = [s for s in fit.slices if s.success] + if not ok: + ax = fplt.create_plot(title=title) + if hasattr(ax, "setTitle"): + ax.setTitle(title) + if show: + fplt.show() + return ax, None + + try: + cmap = plt.colormaps["viridis"] + except (AttributeError, KeyError): + cmap = plt.cm.get_cmap("viridis") + + # finplot uses an x/y scatter/plot model; feed numeric x arrays directly. + ax = fplt.create_plot(title=title, rows=1) + + n_ok = max(len(ok), 1) + for i, s in enumerate(sorted(ok, key=lambda x: x.T_mean)): + # pick a stable color per slice index + c = cmap(i / max(n_ok - 1, 1)) if n_ok > 1 else cmap(0.5) + color_hex = _rgba_to_hex(c) + Tm = s.T_mean + + if group_col in df_prep.columns: + sub = df_prep[df_prep[group_col].astype(str) == str(s.group_key)] + else: + # fallback to matching close maturities + sub = df_prep[ + np.isclose(df_prep["T"].to_numpy(dtype=np.float64), Tm, rtol=0.02, atol=1e-4) + ] + + if sub.empty: + continue + + k_m = sub["log_moneyness"].to_numpy(dtype=np.float64) + if iv_space: + y_m = np.sqrt(np.maximum(sub["total_var"].to_numpy(dtype=np.float64), 0) / max(Tm, 1e-12)) + y_g = np.sqrt( + np.maximum(s.params.total_var(k_grid), 0) / max(Tm, 1e-12) + ) + fplt.plot(k_m, y_m, ax=ax, style="o", color=color_hex, width=3) + try: + fplt.plot( + k_grid, + y_g, + ax=ax, + color=color_hex, + width=2, + legend=f"T≈{Tm:.3f}", + ) + except TypeError: + fplt.plot(k_grid, y_g, ax=ax, color=color_hex, width=2) + if hasattr(ax, "setLabel"): + ax.setLabel("left", "implied vol") + else: + y_m = sub["total_var"].to_numpy(dtype=np.float64) + y_g = s.params.total_var(k_grid) + fplt.plot(k_m, y_m, ax=ax, style="o", color=color_hex, width=3) + try: + fplt.plot( + k_grid, + y_g, + ax=ax, + color=color_hex, + width=2, + legend=f"T≈{Tm:.3f}", + ) + except TypeError: + fplt.plot(k_grid, y_g, ax=ax, color=color_hex, width=2) + if hasattr(ax, "setLabel"): + ax.setLabel("left", "total variance w") + + if hasattr(ax, "setLabel"): + ax.setLabel("bottom", "log moneyness log(K/F)") + if show: + fplt.show() + return ax, None + + +def plot_residual_heatmap_finplot( + df_prep: pd.DataFrame, + fit: SVISurfaceFit, + *, + show: bool = True, + title: str = "SVI residuals (finplot)", + vmin: float = -0.2, + vmax: float = 0.2, + n_bins: int = 11, + max_points: int = 8000, +): + """ + Interactive finplot residual visualization. + + Creates a color-binned scatter of relative w error (model - mkt) / mkt. + """ + fplt = _maybe_import_finplot() + if fplt is None: + raise ImportError("finplot is not installed. Use plot_backend='matplotlib' or install finplot.") + + import matplotlib.pyplot as plt + + rows = [] + for _, row in df_prep.iterrows(): + k = float(row["log_moneyness"]) + w = float(row["total_var"]) + if not np.isfinite(k) or not np.isfinite(w) or w <= 0: + continue + Tm = float(row["T"]) if "T" in row.index else float("nan") + gkey = str(row.get("expiration_date", "")) + match = None + for s in fit.slices: + if s.success and (str(s.group_key) == gkey or np.isclose(s.T_mean, Tm, rtol=0.05, atol=1e-3)): + match = s + break + if match is None: + continue + w_hat = float(match.params.total_var(np.array([k], dtype=np.float64))[0]) + rel = (w_hat - w) / max(w, 1e-12) + rows.append((k, Tm, rel)) + + if not rows: + ax = fplt.create_plot(title=title) + if show: + fplt.show() + return ax, None + + # optional downsampling for interactivity + if len(rows) > max_points: + rows = rows[:: int(np.ceil(len(rows) / max_points))] + + xs = np.array([r[0] for r in rows], dtype=np.float64) + ys = np.array([r[1] for r in rows], dtype=np.float64) + zs = np.array([r[2] for r in rows], dtype=np.float64) + + try: + cmap = plt.colormaps["coolwarm"] + except (AttributeError, KeyError): + cmap = plt.cm.get_cmap("coolwarm") + + bins = np.linspace(vmin, vmax, n_bins + 1) + b_idx = np.digitize(zs, bins) - 1 + + ax = fplt.create_plot(title=title, rows=1) + if hasattr(ax, "setLabel"): + ax.setLabel("bottom", "log(K/F)") + ax.setLabel("left", "T (years)") + + for bi in range(n_bins): + msk = b_idx == bi + if not np.any(msk): + continue + z_mid = (bins[bi] + bins[bi + 1]) / 2.0 + # map z_mid into [0,1] + t = (z_mid - vmin) / max(vmax - vmin, 1e-12) + rgba = cmap(np.clip(t, 0.0, 1.0)) + color_hex = _rgba_to_hex(rgba) + fplt.plot(xs[msk], ys[msk], ax=ax, style="o", color=color_hex, width=4) + + if show: + fplt.show() + return ax, None + + +# --------------------------------------------------------------------------- +# Pipeline entrypoint for load_data-style frames +# --------------------------------------------------------------------------- + + +def calibrate_from_option_frame( + option_quotes_contracts: pd.DataFrame, + *, + r: float = 0.05, + group_col: str = "expiration_date", + fit_method: str = "least_squares", + plot: bool = True, + plot_backend: str = "matplotlib", + plot_path: Optional[str] = "svi_surface_fit.pdf", + residual_path: Optional[str] = "svi_residuals.pdf", + finplot_show: bool = True, +) -> tuple[pd.DataFrame, SVISurfaceFit, pd.DataFrame]: + """ + Full pipeline: prepare inputs, fit SVI surface, return (prepared_df, fit, params_table). + + ``option_quotes_contracts`` should be the merged quotes frame after + :func:`compute_iv` (columns ``spot``, ``strike``, ``T``, ``iv``, ``expiration_date``, …). + """ + prep = prepare_svi_inputs(option_quotes_contracts, r=r) + fit = fit_svi_surface( + prep, + group_col=group_col, + fit_kwargs={"method": fit_method, "reg_lambda": 0.01}, + ) + params_df = surface_params_dataframe(fit) + if plot and len(fit.slices) > 0: + backend = plot_backend.lower().strip() + fplt = _maybe_import_finplot() + use_finplot = backend in {"finplot", "auto"} and fplt is not None + if backend in {"finplot"} and fplt is None: + raise ImportError("plot_backend='finplot' requested but finplot is not installed.") + + if use_finplot: + # interactive (finplot) + plot_svi_surface_fit_finplot( + prep, fit, group_col=group_col, show=finplot_show + ) + plot_residual_heatmap_finplot(prep, fit, show=finplot_show) + + # still save PDFs if requested (matplotlib backend) + if plot_path: + plot_svi_surface_fit(prep, fit, group_col=group_col, save_path=plot_path, show=False) + if residual_path: + plot_residual_heatmap(prep, fit, save_path=residual_path, show=False) + else: + # file-based (matplotlib) + plot_svi_surface_fit(prep, fit, group_col=group_col, save_path=plot_path, show=False) + plot_residual_heatmap(prep, fit, save_path=residual_path, show=False) + return prep, fit, params_df + diff --git a/src/NewtonSolver.cpp b/src/NewtonSolver.cpp deleted file mode 100644 index eeb97c2..0000000 --- a/src/NewtonSolver.cpp +++ /dev/null @@ -1,7 +0,0 @@ -// -// Created by David Doebel on 13.03.2026. -// - -#include "NewtonSolver.hpp" - - diff --git a/src/PricingEngine.cpp b/src/PricingEngine.cpp deleted file mode 100644 index bdcf247..0000000 --- a/src/PricingEngine.cpp +++ /dev/null @@ -1,5 +0,0 @@ -// -// Created by David Doebel on 05.03.2026. -// - -#include "PricingEngine.hpp" \ No newline at end of file diff --git a/src/StochasticProcess.cpp b/src/StochasticProcess.cpp deleted file mode 100644 index 52947c3..0000000 --- a/src/StochasticProcess.cpp +++ /dev/null @@ -1,5 +0,0 @@ -// -// Created by David Doebel on 05.03.2026. -// - -#include "StochasticProcess.hpp" \ No newline at end of file diff --git a/src/VolatilitySurface.cpp b/src/VolatilitySurface.cpp deleted file mode 100644 index c719fbc..0000000 --- a/src/VolatilitySurface.cpp +++ /dev/null @@ -1,5 +0,0 @@ -// -// Created by David Doebel on 06.03.2026. -// - -#include "VolatilitySurface.hpp" \ No newline at end of file diff --git a/src/VolatilitySurface.hpp b/src/VolatilitySurface.hpp deleted file mode 100644 index 438df63..0000000 --- a/src/VolatilitySurface.hpp +++ /dev/null @@ -1,18 +0,0 @@ -// -// Created by David Doebel on 06.03.2026. -// - -#ifndef QUANTENGINE_VOLATILITYSURFACE_HPP -#define QUANTENGINE_VOLATILITYSURFACE_HPP - - -class VolatilitySurface { -public: - virtual ~VolatilitySurface() = default; - virtual double sigma(double K, double T) const = 0; -private: - -}; - - -#endif //QUANTENGINE_VOLATILITYSURFACE_HPP diff --git a/src/YieldCurve.cpp b/src/YieldCurve.cpp deleted file mode 100644 index b4e73c3..0000000 --- a/src/YieldCurve.cpp +++ /dev/null @@ -1,5 +0,0 @@ -// -// Created by David Doebel on 06.03.2026. -// - -#include "YieldCurve.hpp" \ No newline at end of file diff --git a/src/__init__.py b/src/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/src/data/config/settings.py b/src/data/config/settings.py deleted file mode 100644 index 49cb6a0..0000000 --- a/src/data/config/settings.py +++ /dev/null @@ -1,14 +0,0 @@ -DB_CONFIG = { - "host": "localhost", - "port": 5432, - "database": "options_db", - "user": "quant_user", - "password": "strong_password", -} - -PIPELINE_CONFIG = { - "symbols": [ - "SPY" - # Example: "SPY" - ] -} \ No newline at end of file diff --git a/src/data/database_interaction.py b/src/data/database_interaction.py index 0d0f9c5..3871c77 100644 --- a/src/data/database_interaction.py +++ b/src/data/database_interaction.py @@ -1,13 +1,15 @@ -import psycopg2 +import pandas as pd -conn = psycopg2.connect( - dbname="options_db", - user="quant_user", - password="strong_password", - host="144.91.73.49", - port="5432" -) +from option_pricing.src.data.ingestion.db_connect import db_engine -cursor = conn.cursor() -cursor.execute("SELECT * FROM underlyings;") -print(cursor.fetchall()) \ No newline at end of file + +def fetch_underlyings() -> pd.DataFrame: + """ + Fetch all entries from the underlyings table using configured DB credentials. + """ + engine = db_engine() + return pd.read_sql("SELECT * FROM underlyings;", engine) + + +if __name__ == "__main__": + print(fetch_underlyings()) \ No newline at end of file diff --git a/src/data/ingestion/config/__init__.py b/src/data/ingestion/config/__init__.py new file mode 100644 index 0000000..3b0c909 --- /dev/null +++ b/src/data/ingestion/config/__init__.py @@ -0,0 +1,3 @@ +from .settings import DB_CONFIG, PIPELINE_CONFIG + +__all__ = ["DB_CONFIG", "PIPELINE_CONFIG"] diff --git a/src/data/ingestion/config/settings.py b/src/data/ingestion/config/settings.py new file mode 100644 index 0000000..863eecd --- /dev/null +++ b/src/data/ingestion/config/settings.py @@ -0,0 +1,31 @@ +import os + + +def _get_env_int(name: str, default: int) -> int: + raw = os.getenv(name) + if raw is None: + return default + try: + return int(raw) + except ValueError as exc: + raise ValueError(f"Environment variable {name} must be an integer, got '{raw}'") from exc + + +def _get_env_list(name: str, default: list[str]) -> list[str]: + raw = os.getenv(name) + if not raw: + return default + return [x.strip() for x in raw.split(",") if x.strip()] + + +DB_CONFIG = { + "host": os.getenv("DB_HOST", "localhost"), + "port": _get_env_int("DB_PORT", 5432), + "database": os.getenv("DB_NAME", "options_db"), + "user": os.getenv("DB_USER", "quant_user"), + "password": os.getenv("DB_PASSWORD", ""), +} + +PIPELINE_CONFIG = { + "symbols": _get_env_list("PIPELINE_SYMBOLS", ["SPY"]), +} \ No newline at end of file diff --git a/src/data/ingestion/db_connect.py b/src/data/ingestion/db_connect.py new file mode 100644 index 0000000..fe66920 --- /dev/null +++ b/src/data/ingestion/db_connect.py @@ -0,0 +1,13 @@ +from sqlalchemy import create_engine +from option_pricing.src.data.ingestion.config.settings import DB_CONFIG + +def build_db_url() -> str: + return ( + f"postgresql+psycopg2://{DB_CONFIG['user']}:{DB_CONFIG['password']}" + f"@{DB_CONFIG['host']}:{DB_CONFIG['port']}/{DB_CONFIG['database']}" + ) + +def db_engine(): + db_url = build_db_url() + engine = create_engine(db_url, future=True) + return engine diff --git a/src/data/ingestion/ingest_ubs_comparison.py b/src/data/ingestion/ingest_ubs_comparison.py index 296af06..a8e9b69 100644 --- a/src/data/ingestion/ingest_ubs_comparison.py +++ b/src/data/ingestion/ingest_ubs_comparison.py @@ -1,6 +1,8 @@ from datetime import datetime, timedelta import pandas as pd import yfinance as yf + +from db_connect import db_engine from sqlalchemy import create_engine from sqlalchemy.dialects.postgresql import insert from sqlalchemy import MetaData, Table @@ -10,9 +12,6 @@ TICKERS = ["UBS", "^GSPC"] DAYS_BACK = 31 # ~3 weeks TABLE_NAME = "prices" -DB_URI = "postgresql://quant_user:strong_password@localhost:5432/options_db" - - def fetch_data(tickers, start_date, end_date): data = yf.download( tickers, @@ -91,7 +90,7 @@ def main(): raw = fetch_data(TICKERS, start_date, end_date) df = transform_data(raw) - engine = create_engine(DB_URI) + engine = db_engine() load_to_postgres(df, engine) print("Ingestion complete.") diff --git a/src/data/ingestion/ingest_yahoo_options.py b/src/data/ingestion/ingest_yahoo_options.py index e4ac989..0b62c0c 100644 --- a/src/data/ingestion/ingest_yahoo_options.py +++ b/src/data/ingestion/ingest_yahoo_options.py @@ -1,11 +1,11 @@ from datetime import datetime, timezone -from decimal import Decimal, InvalidOperation import pandas as pd import yfinance as yf -from sqlalchemy import create_engine, text +from sqlalchemy import text -from config.settings import DB_CONFIG, PIPELINE_CONFIG +from option_pricing.src.data.ingestion.config import DB_CONFIG, PIPELINE_CONFIG +from db_connect import db_engine def build_db_url() -> str: @@ -269,8 +269,7 @@ def ingest_symbol(symbol: str, engine): def main(): - db_url = build_db_url() - engine = create_engine(db_url, future=True) + engine = db_engine() for symbol in PIPELINE_CONFIG["symbols"]: ingest_symbol(symbol, engine) diff --git a/src/data/load_data.py b/src/data/load_data.py new file mode 100644 index 0000000..9f04dfe --- /dev/null +++ b/src/data/load_data.py @@ -0,0 +1,436 @@ +import numpy as np +import pandas as pd +import matplotlib.pyplot as plt + +from option_pricing.src.data.ingestion.db_connect import db_engine +from option_pricing.src.ImpliedVolatility.compute_vls import implied_vol + + +def _normalize_quote_timestamp(df: pd.DataFrame) -> pd.DataFrame: + if "timestamp" not in df.columns and "quote_timestamp" in df.columns: + return df.rename(columns={"quote_timestamp": "timestamp"}) + return df + + +def _normalize_price_timestamp(df: pd.DataFrame) -> pd.DataFrame: + if "timestamp" not in df.columns and "price_timestamp" in df.columns: + return df.rename(columns={"price_timestamp": "timestamp"}) + return df + + +def load_data(): + engine = db_engine() + underlyings = pd.read_sql("SELECT * FROM underlyings;", engine) + underlying_prices = _normalize_price_timestamp( + pd.read_sql("SELECT * FROM underlying_prices;", engine) + ) + option_quotes = _normalize_quote_timestamp(pd.read_sql("SELECT * FROM option_quotes;", engine)) + option_contracts = pd.read_sql("SELECT * FROM option_contracts;", engine) + return underlyings, underlying_prices, option_quotes, option_contracts + + +def clean_data(data: pd.DataFrame): + data.dropna(inplace=True) + data = data[data["volume"] > 0] + data = data[data["open_interest"] > 10] + data["spread"] = data["ask"] - data["bid"] + #data = data[data["spread"] / data["mid"] < 1] + return data + + +def merge_quotes_contracts(option_quotes: pd.DataFrame, option_contracts: pd.DataFrame): + if "timestamp" not in option_quotes.columns: + raise KeyError("option_quotes needs a quote time column ('timestamp' or 'quote_timestamp')") + + option_quotes = option_quotes.groupby(["contract_id", "timestamp"], as_index=False).agg( + { + "bid": "mean", + "ask": "mean", + "mid": "mean", + "last_price": "mean", + "implied_vol": "mean", + "volume": "sum", + "open_interest": "sum", + } + ) + option_quotes = option_quotes.merge( + option_contracts, left_on="contract_id", right_on="id", how="left" + ) + option_quotes["timestamp"] = pd.to_datetime(option_quotes["timestamp"]) + option_quotes["expiration_date"] = pd.to_datetime(option_quotes["expiration_date"]) + option_quotes["T"] = ( + option_quotes["expiration_date"] - option_quotes["timestamp"] + ).dt.total_seconds() / (365 * 24 * 3600) + return option_quotes + + +def compute_iv(option_quotes_contracts, underlying_prices): + df = option_quotes_contracts.copy() + up = _normalize_price_timestamp(underlying_prices.copy()) + + up["timestamp"] = pd.to_datetime(up["timestamp"]) + up = up.sort_values("timestamp").drop_duplicates( + ["underlying_id", "timestamp"], keep="last" + ) + + mask = df["T"] > 0 + if not mask.any(): + df["iv"] = np.nan + return df + + sub = df.loc[mask].copy() + sub["_idx"] = sub.index + + merged = sub.merge( + up[["underlying_id", "timestamp", "price"]], + on=["underlying_id", "timestamp"], + how="left", + validate="many_to_one", + ) + + # assign back using explicit index + df["spot"] = np.nan + df.loc[merged["_idx"], "spot"] = merged["price"].to_numpy() + + price = merged["mid"].to_numpy(dtype=np.float64) + S = merged["price"].to_numpy(dtype=np.float64) + K = merged["strike"].to_numpy(dtype=np.float64) + T = merged["T"].to_numpy(dtype=np.float64) + call = (merged["option_type"] == "call").to_numpy() + + + df["iv"] = np.nan + df.loc[sub.index, "iv"] = implied_vol(price, S, K, T, 0.05, call) + return df + +def fit_ivsimle(option_quotes_contracts): + from scipy.interpolate import UnivariateSpline + sort = option_quotes_contracts.sort_values("log_moneyness").dropna() + x = sort["log_moneyness"] + y = sort["iv"] + y_yahoo = sort["implied_vol"] + print(x,y,y_yahoo) + f = UnivariateSpline(x, y, s=None) + f_yahoo = UnivariateSpline(x, y_yahoo, s=None) + # plot the smile + x_lin = np.linspace(x.min(), x.max(), 200) + plt.plot(x_lin, f(x_lin), label="iv smile") + plt.plot(x_lin, f_yahoo(x_lin), label="yahoo iv smile") + plt.legend() + plt.savefig("iv_smile_fit.pdf") + + + return f + +def calibrate_svi_surface(option_quotes_contracts: pd.DataFrame, r: float = 0.05, **kwargs): + """ + Fit SVI per expiry on ``iv`` from :func:`compute_iv` and plot diagnostics. + + See :func:`option_pricing.src.ImpliedVolatility.svi.calibrate_from_option_frame`. + """ + from option_pricing.src.ImpliedVolatility.svi import calibrate_from_option_frame + + return calibrate_from_option_frame(option_quotes_contracts, r=r, **kwargs) + +def clean_before_svi(option_quotes_contracts: pd.DataFrame): + option_quotes_contracts = option_quotes_contracts[option_quotes_contracts["T"] > 0.05] + return option_quotes_contracts + + +def plot_smoothed_svi_surface(prep: pd.DataFrame, params: pd.DataFrame, r: float = 0.05): + """ + Plot independent slice fits after maturity smoothing. + + Outputs: + - svi_smoothed_surface.pdf + - svi_calendar_violation_heatmap.pdf + """ + from option_pricing.src.ImpliedVolatility.svi import ( + calendar_violation_matrix, + smooth_svi_parameters, + ) + + # Build smoothed maturity-parameter curves from calibrated slice parameters + curves = smooth_svi_parameters( + params, + T_col="T_mean", + smooth_factor_a=1e-4, + smooth_factor_m=1e-4, + smooth_factor_others=0.0, + min_T=0.05, + weight_col="n_points", + ) + + # Overlay market points and smoothed model by maturity + plot_df = prep.copy() + if "T" not in plot_df.columns or "total_var" not in plot_df.columns: + raise KeyError("prep must include columns 'T' and 'total_var'") + + T_grid = np.sort(params.loc[params["success"], "T_mean"].to_numpy(dtype=np.float64)) + if T_grid.size < 2: + return + k_grid = np.linspace( + float(plot_df["log_moneyness"].quantile(0.02)), + float(plot_df["log_moneyness"].quantile(0.98)), + 180, + ) + + plt.figure(figsize=(11, 7)) + cmap = plt.colormaps["viridis"] + nT = max(len(T_grid), 1) + for i, Ti in enumerate(T_grid): + color = cmap(i / max(nT - 1, 1)) if nT > 1 else cmap(0.5) + near = np.isclose(plot_df["T"].to_numpy(dtype=np.float64), Ti, rtol=0.03, atol=2e-3) + sub = plot_df.loc[near] + if sub.empty: + continue + # market IV points + iv_mkt = np.sqrt( + np.maximum(sub["total_var"].to_numpy(dtype=np.float64), 0.0) + / np.maximum(Ti, 1e-12) + ) + plt.scatter( + sub["log_moneyness"].to_numpy(dtype=np.float64), + iv_mkt, + s=10, + alpha=0.35, + color=color, + ) + # smoothed curve IV + w_model = curves.total_var(k_grid, np.array([Ti], dtype=np.float64))[0] + iv_model = np.sqrt(np.maximum(w_model, 0.0) / np.maximum(Ti, 1e-12)) + plt.plot(k_grid, iv_model, color=color, lw=2, label=f"T={Ti:.3f}") + + plt.xlabel("log moneyness log(K/F)") + plt.ylabel("implied vol") + plt.title("SVI surface: market points vs smoothed maturity curves") + plt.grid(alpha=0.3) + plt.legend(fontsize=8, ncol=2) + plt.tight_layout() + plt.savefig("svi_smoothed_surface.pdf", bbox_inches="tight") + plt.clf() + + # Calendar diagnostics from smoothed surface + cal_diff = calendar_violation_matrix(curves, T_grid, k_grid) + # diff shape: (len(T_grid)-1, len(k_grid)) where negative is violation + plt.figure(figsize=(11, 4)) + im = plt.imshow( + cal_diff, + aspect="auto", + origin="lower", + cmap="coolwarm", + vmin=-0.02, + vmax=0.02, + extent=[k_grid.min(), k_grid.max(), 0, cal_diff.shape[0]], + ) + plt.colorbar(im, label="w(T_{j+1},k)-w(T_j,k)") + plt.xlabel("log moneyness") + plt.ylabel("maturity step j") + plt.title("Calendar diagnostic heatmap (negative = violation)") + plt.tight_layout() + plt.savefig("svi_calendar_violation_heatmap.pdf", bbox_inches="tight") + plt.clf() + + +def _fit_slice_with_svi_py_model( + model: object, + model_name: str, + k: np.ndarray, + w: np.ndarray, + T: float, + *, + theta_ref: float, + prev_params: dict | None, + k_eval: np.ndarray, +) -> tuple[np.ndarray, dict]: + """Fit one slice with a specific pysvi model and evaluate total variance on k_eval.""" + T = float(T) + k = np.asarray(k, dtype=np.float64) + w = np.asarray(w, dtype=np.float64) + k_eval = np.asarray(k_eval, dtype=np.float64) + + # ATM total variance proxy for models requiring theta + theta = float(np.interp(0.0, np.sort(k), w[np.argsort(k)])) + theta = max(theta, 1e-8) + + kwargs: dict = {} + if model_name == "ssvi": + kwargs["theta"] = theta + elif model_name == "essvi": + kwargs["theta"] = theta + kwargs["theta_ref"] = max(float(theta_ref), 1e-8) + elif model_name in {"jumpwings", "jw"}: + kwargs["T"] = max(T, 1e-8) + + # Option B: calendar penalty uses pysvi internal 200-point grid per current slice. + # Build w_prev on that exact grid to avoid shape mismatch. + if prev_params is not None: + k_cal = np.linspace(float(k.min()) - 0.5, float(k.max()) + 0.5, 200) + kwargs["w_prev"] = np.asarray(model.total_variance(k_cal, prev_params), dtype=np.float64) + + params = model.calibrate(k, w, **kwargs) + if params is None: + raise RuntimeError(f"pysvi {model_name} calibration failed") + w_eval = model.total_variance(k_eval, params) + return np.asarray(w_eval, dtype=np.float64), params + + +def compare_vs_svi_py(prep: pd.DataFrame, params: pd.DataFrame): + """ + Compare in-house SVI fit against pysvi models with explicit no-arbitrage flags. + + Outputs: + - svi_vs_pysvi__comparison.pdf for model in {svi, ssvi, essvi, jumpwings} + - svi_vs_pysvi_metrics.csv + """ + from option_pricing.src.ImpliedVolatility.svi import SVIParams + from pysvi import ArbitrageFreedom, get_model + + ok_params = params[params["success"]].copy() + if ok_params.empty: + print("compare_vs_svi_py: no successful in-house slices; skipping.") + return + + k_min = float(prep["log_moneyness"].quantile(0.02)) + k_max = float(prep["log_moneyness"].quantile(0.98)) + k_grid = np.linspace(k_min, k_max, 180) + + models = ["svi", "ssvi", "essvi", "jumpwings"] + rows: list[dict] = [] + + # reference theta for eSSVI from in-house successful slices + theta_ref = float(np.median(ok_params["T_mean"].to_numpy(dtype=np.float64) * 0 + 1.0)) + # Better theta_ref proxy from observed market ATM if available + theta_vals = [] + for _, row in ok_params.iterrows(): + Ti = float(row["T_mean"]) + near = np.isclose(prep["T"].to_numpy(dtype=np.float64), Ti, rtol=0.03, atol=2e-3) + sub = prep.loc[near].sort_values("log_moneyness") + if len(sub) < 10: + continue + ks = sub["log_moneyness"].to_numpy(dtype=np.float64) + ws = sub["total_var"].to_numpy(dtype=np.float64) + theta_vals.append(float(np.interp(0.0, np.sort(ks), ws[np.argsort(ks)]))) + if theta_vals: + theta_ref = float(np.median(theta_vals)) + + sorted_rows = list(ok_params.sort_values("T_mean").iterrows()) + for model_name in models: + flags = ArbitrageFreedom.NO_BUTTERFLY | ArbitrageFreedom.NO_CALENDAR + model = get_model(model_name, flags) + plt.figure(figsize=(11, 7)) + cmap = plt.colormaps["tab20"] + prev_params = None + n_used = 0 + for _, row in sorted_rows: + Ti = float(row["T_mean"]) + near = np.isclose(prep["T"].to_numpy(dtype=np.float64), Ti, rtol=0.03, atol=2e-3) + sub = prep.loc[near].sort_values("log_moneyness") + if len(sub) < 10: + continue + k = sub["log_moneyness"].to_numpy(dtype=np.float64) + w = sub["total_var"].to_numpy(dtype=np.float64) + + p_ours = SVIParams( + float(row["a"]), float(row["b"]), float(row["rho"]), float(row["m"]), float(row["sigma"]) + ) + w_ours = p_ours.total_var(k_grid) + rmse_ours = float(np.sqrt(np.mean((p_ours.total_var(k) - w) ** 2))) + + try: + w_ext, ext_params = _fit_slice_with_svi_py_model( + model, + model_name, + k, + w, + Ti, + theta_ref=theta_ref, + prev_params=prev_params, + k_eval=k_grid, + ) + rmse_ext = float(np.sqrt(np.mean((np.interp(k, k_grid, w_ext) - w) ** 2))) + rows.append( + { + "model": model_name, + "T_mean": Ti, + "rmse_ours": rmse_ours, + "rmse_pysvi": rmse_ext, + "delta_rmse": rmse_ext - rmse_ours, + "ext_params": str(ext_params), + } + ) + color = cmap(n_used % 20) + n_used += 1 + plt.plot(k_grid, np.sqrt(np.maximum(w_ours, 0) / max(Ti, 1e-12)), color=color, lw=2, alpha=0.9) + plt.plot(k_grid, np.sqrt(np.maximum(w_ext, 0) / max(Ti, 1e-12)), color=color, lw=1.5, ls="--", alpha=0.9) + prev_params = ext_params + except Exception as exc: + print(f"compare_vs_svi_py[{model_name}]: skipping T={Ti:.4f}, reason: {exc}") + continue + + if n_used == 0: + plt.close() + continue + + plt.xlabel("log moneyness") + plt.ylabel("implied vol") + plt.title(f"In-house SVI (solid) vs pysvi {model_name} (dashed)") + plt.grid(alpha=0.3) + plt.tight_layout() + plt.savefig(f"svi_vs_pysvi_{model_name}_comparison.pdf", bbox_inches="tight") + plt.clf() + + out = pd.DataFrame(rows) + if out.empty: + print("compare_vs_svi_py: no slices compared (pysvi unavailable or incompatible).") + return + out = out.sort_values(["model", "T_mean"]) + out.to_csv("svi_vs_pysvi_metrics.csv", index=False) + print(out.groupby("model")[["rmse_ours", "rmse_pysvi", "delta_rmse"]].mean()) + + +def plot_ivsmile(option_quotes_contracts): + option_quotes_contracts = option_quotes_contracts.sort_values("strike") + option_quotes_contracts["log_moneyness"] = np.log( + option_quotes_contracts["spot"] * np.exp(0.05 * option_quotes_contracts["T"])/option_quotes_contracts["strike"] + ) + option_quotes_contracts = option_quotes_contracts[option_quotes_contracts["log_moneyness"].abs() < 0.2] + #option_quotes_contracts = option_quotes_contracts[option_quotes_contracts["mid"] > 0.2] + plt.plot(option_quotes_contracts["strike"], option_quotes_contracts["iv"], label="iv smile") + plt.plot(option_quotes_contracts["strike"], option_quotes_contracts["implied_vol"], label="i. vol") + plt.legend() + plt.savefig("iv_smile.pdf") + plt.xlabel("iv") + plt.ylabel("strike price") + plt.clf() + return option_quotes_contracts + + + + + +if __name__ == "__main__": + underlyings, underlying_prices, option_quotes, option_contracts = load_data() + option_quotes_contracts = merge_quotes_contracts(option_quotes, option_contracts) + option_quotes_contracts = clean_data(option_quotes_contracts) + option_quotes_contracts = compute_iv(option_quotes_contracts, underlying_prices) + mask = option_quotes_contracts["iv"].notna() + print(option_quotes_contracts) + print(option_quotes_contracts.columns) + #plt.plot(option_quotes_contracts["contract_id"][mask], option_quotes_contracts["implied_vol"][mask], label="i. iv") + #plt.plot(option_quotes_contracts["contract_id"][mask],option_quotes_contracts["iv"][mask], label="comp. iv") + #plt.legend() + #plt.show() + option_quotes_contracts = plot_ivsmile(option_quotes_contracts) + fit_ivsimle(option_quotes_contracts) + prep, svi_fit, params = calibrate_svi_surface( + clean_before_svi(option_quotes_contracts), + r=0.05, + plot_backend="matplotlib", + finplot_show=True, + # optionally: plot_path=None to avoid matplotlib PDF output + ) + print(svi_fit) + plot_smoothed_svi_surface(prep, params, r=0.05) + compare_vs_svi_py(prep, params) + diff --git a/standalone_numerical_experiments/local_volatility_instability/INDEPENDENT_STANDALONE.txt b/standalone_numerical_experiments/local_volatility_instability/INDEPENDENT_STANDALONE.txt new file mode 100644 index 0000000..58fc565 --- /dev/null +++ b/standalone_numerical_experiments/local_volatility_instability/INDEPENDENT_STANDALONE.txt @@ -0,0 +1,6 @@ +This folder is intentionally self-contained. + +- No imports from the parent option_pricing package (no qengine, src/, cpp bindings). +- Third-party dependencies: numpy, matplotlib (see requirements.txt). +- Run: python run_experiment.py [--out lv_rmse.png] +- Safe to copy elsewhere or run in isolation. diff --git a/standalone_numerical_experiments/local_volatility_instability/figures/lv_relerr.png b/standalone_numerical_experiments/local_volatility_instability/figures/lv_relerr.png new file mode 100644 index 0000000..9ef14f3 Binary files /dev/null and b/standalone_numerical_experiments/local_volatility_instability/figures/lv_relerr.png differ diff --git a/standalone_numerical_experiments/local_volatility_instability/figures/lv_rmse.png b/standalone_numerical_experiments/local_volatility_instability/figures/lv_rmse.png new file mode 100644 index 0000000..e617d33 Binary files /dev/null and b/standalone_numerical_experiments/local_volatility_instability/figures/lv_rmse.png differ diff --git a/standalone_numerical_experiments/local_volatility_instability/figures/lv_sigma2.png b/standalone_numerical_experiments/local_volatility_instability/figures/lv_sigma2.png new file mode 100644 index 0000000..e525355 Binary files /dev/null and b/standalone_numerical_experiments/local_volatility_instability/figures/lv_sigma2.png differ diff --git a/standalone_numerical_experiments/local_volatility_instability/gatheral_local_vol.py b/standalone_numerical_experiments/local_volatility_instability/gatheral_local_vol.py new file mode 100644 index 0000000..19db847 --- /dev/null +++ b/standalone_numerical_experiments/local_volatility_instability/gatheral_local_vol.py @@ -0,0 +1,108 @@ +""" +Gatheral local variance in total-variance / log-moneyness form (practitioner's guide). + +sigma^2 = (d_T w) / ( 1 - (y/w) d_y w + + (1/4)(-1/4 - 1/w + y^2/w^2) (d_y w)^2 + + (1/2) d_yy w ) + +where w = omega is total implied variance, y is log-moneyness (convention as in the note). +""" + +from __future__ import annotations + +import numpy as np + + +def local_variance_from_derivatives( + y: np.ndarray, + w: np.ndarray, + dy_w: np.ndarray, + dyy_w: np.ndarray, + dT_w: np.ndarray, + *, + eps: float = 1e-14, +) -> np.ndarray: + """Vectorized Gatheral formula. Invalid / near-singular points become nan.""" + y = np.asarray(y, dtype=float) + w = np.asarray(w, dtype=float) + dy_w = np.asarray(dy_w, dtype=float) + dyy_w = np.asarray(dyy_w, dtype=float) + dT_w = np.asarray(dT_w, dtype=float) + + out = np.full_like(y, np.nan, dtype=float) + ok = np.isfinite(w) & (np.abs(w) > eps) & np.isfinite(dy_w) & np.isfinite(dyy_w) & np.isfinite(dT_w) + + denom = np.empty_like(w) + denom[ok] = ( + 1.0 + - (y[ok] / w[ok]) * dy_w[ok] + + 0.25 * (-0.25 - 1.0 / w[ok] + (y[ok] ** 2) / (w[ok] ** 2)) * (dy_w[ok] ** 2) + + 0.5 * dyy_w[ok] + ) + + ok2 = ok & (np.abs(denom) > eps) + out[ok2] = dT_w[ok2] / denom[ok2] + return out + + +def quadratic_total_variance( + y: np.ndarray, + alpha: float, + beta: float, + gamma: float, + T: float, +) -> tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray, np.ndarray]: + """ + w(y,T) = T * (alpha + beta*y + gamma*y^2), with derivatives as in the note: + + d_T w = alpha + beta*y + gamma*y^2 + d_y w = T * (beta + 2*gamma*y) + d_yy w = 2*gamma*T + """ + y = np.asarray(y, dtype=float) + f = alpha + beta * y + gamma * y ** 2 + w = T * f + dT_w = f + dy_w = T * (beta + 2.0 * gamma * y) + dyy_w = np.full_like(y, 2.0 * gamma * T) + return w, dT_w, dy_w, dyy_w + + +def analytic_local_variance_quadratic( + y: np.ndarray, + alpha: float, + beta: float, + gamma: float, + T: float, +) -> np.ndarray: + """Closed form from the note (equivalent to plugging derivatives into Gatheral).""" + y = np.asarray(y, dtype=float) + w, dT_w, dy_w, dyy_w = quadratic_total_variance(y, alpha, beta, gamma, T) + return local_variance_from_derivatives(y, w, dy_w, dyy_w, dT_w) + + +def central_first_derivative_uniform(w: np.ndarray, h: float) -> np.ndarray: + """Interior (w[i+1]-w[i-1])/(2h); endpoints nan.""" + w = np.asarray(w, dtype=float) + out = np.full_like(w, np.nan) + out[1:-1] = (w[2:] - w[:-2]) / (2.0 * h) + return out + + +def second_derivative_uniform(w: np.ndarray, h: float) -> np.ndarray: + """Interior second difference / h^2; endpoints nan.""" + w = np.asarray(w, dtype=float) + out = np.full_like(w, np.nan) + out[1:-1] = (w[2:] - 2.0 * w[1:-1] + w[:-2]) / (h ** 2) + return out + + +def add_multiplicative_noise( + w: np.ndarray, + sigma_noise: float, + rng: np.random.Generator, +) -> np.ndarray: + """tilde w(y_i) = w(y_i) * (1 + eps), eps ~ N(0, sigma_noise^2).""" + w = np.asarray(w, dtype=float) + eps = rng.normal(0.0, sigma_noise, size=w.shape) + return w * (1.0 + eps) diff --git a/standalone_numerical_experiments/local_volatility_instability/lv_rmse.png b/standalone_numerical_experiments/local_volatility_instability/lv_rmse.png new file mode 100644 index 0000000..b2a75e7 Binary files /dev/null and b/standalone_numerical_experiments/local_volatility_instability/lv_rmse.png differ diff --git a/standalone_numerical_experiments/local_volatility_instability/requirements.txt b/standalone_numerical_experiments/local_volatility_instability/requirements.txt new file mode 100644 index 0000000..337f34a --- /dev/null +++ b/standalone_numerical_experiments/local_volatility_instability/requirements.txt @@ -0,0 +1,2 @@ +numpy>=1.20 +matplotlib>=3.5 diff --git a/standalone_numerical_experiments/local_volatility_instability/run_experiment.py b/standalone_numerical_experiments/local_volatility_instability/run_experiment.py new file mode 100644 index 0000000..cb6b3f2 --- /dev/null +++ b/standalone_numerical_experiments/local_volatility_instability/run_experiment.py @@ -0,0 +1,309 @@ +#!/usr/bin/env python3 +""" +Local-volatility instability experiment (Gatheral total variance in log-moneyness). + +We compare the analytic local variance σ²(y) from a quadratic total variance +w(y,T) = T(α + βy + γy²) to σ² reconstructed from a noisy discrete surface +w̃(y_i) = w(y_i)(1 + ε_i) using finite differences in y, for several levels of +multiplicative noise σ_noise. This script only produces the figure: RMSE of the +FD reconstruction vs σ_noise (log–log), with a y = σ reference line of slope 1. + +Dependencies: numpy, matplotlib only (see INDEPENDENT_STANDALONE.txt). +""" + +from __future__ import annotations + +import argparse +import os +import sys +from typing import Literal + +# Prevent accidental imports from the parent repository +_REPO_ROOT = os.path.abspath(os.path.join(os.path.dirname(__file__), "..", "..")) +if _REPO_ROOT in sys.path: + sys.path.remove(_REPO_ROOT) + +import matplotlib as mpl +import matplotlib.pyplot as plt +import numpy as np + +from gatheral_local_vol import ( + add_multiplicative_noise, + analytic_local_variance_quadratic, + central_first_derivative_uniform, + local_variance_from_derivatives, + quadratic_total_variance, + second_derivative_uniform, +) + +# --------------------------------------------------------------------------- +# Defaults (quadratic total variance, positive w on y ∈ [-0.5, 0.5]) +# --------------------------------------------------------------------------- + +ALPHA = 0.04 +BETA = 0.0 +GAMMA = 0.1 +T_MATURITY = 1.0 +Y_MIN = -0.5 +Y_MAX = 0.5 +N_GRID = 201 + + +def ensure_parent_dir(path: str) -> None: + parent = os.path.dirname(os.path.abspath(path)) + if parent: + os.makedirs(parent, exist_ok=True) + + +def log_uniform_sigma_grid(n_points: int, sigma_min: float, sigma_max: float) -> np.ndarray: + """ + Return `n_points` values of σ_noise with log₁₀(σ) equally spaced. + + This is the correct sampling for a log–log RMSE plot; it is not linspace(σ_min, σ_max). + """ + n_points = max(4, n_points) + if sigma_min <= 0 or sigma_max <= 0 or sigma_max < sigma_min: + raise ValueError("Require 0 < sigma_min <= sigma_max.") + return np.logspace(np.log10(sigma_min), np.log10(sigma_max), n_points) + + +def relative_pointwise_error( + sigma2_analytic: np.ndarray, sigma2_fd: np.ndarray, eps: float = 1e-12 +) -> np.ndarray: + return (sigma2_fd - sigma2_analytic) / np.maximum(np.abs(sigma2_analytic), eps) + + +def rmse_absolute( + sigma2_analytic: np.ndarray, + sigma2_fd: np.ndarray, + interior: slice, +) -> float: + """RMSE of (σ²_FD − σ²_analytic) on interior indices.""" + sa = np.asarray(sigma2_analytic, dtype=float)[interior] + sf = np.asarray(sigma2_fd, dtype=float)[interior] + m = np.isfinite(sa) & np.isfinite(sf) + if not np.any(m): + return float("nan") + d = sf[m] - sa[m] + return float(np.sqrt(np.mean(d * d))) + + +def rmse_relative( + sigma2_analytic: np.ndarray, + sigma2_fd: np.ndarray, + interior: slice, + eps: float = 1e-12, +) -> float: + """RMSE over grid points of relative error (σ²_FD − σ²_analytic) / |σ²_analytic|.""" + re = relative_pointwise_error(sigma2_analytic, sigma2_fd, eps=eps)[interior] + m = np.isfinite(re) + if not np.any(m): + return float("nan") + return float(np.sqrt(np.mean(re[m] ** 2))) + + +def local_variance_one_draw( + y: np.ndarray, + h: float, + alpha: float, + beta: float, + gamma: float, + T: float, + sigma_noise: float, + rng: np.random.Generator, + dT_mode: Literal["exact", "noisy_ratio"], +) -> tuple[np.ndarray, np.ndarray]: + """One noisy surface and FD local variance; returns (σ²_analytic, σ²_FD).""" + w_true, dT_w_true, _, _ = quadratic_total_variance(y, alpha, beta, gamma, T) + sigma2_a = analytic_local_variance_quadratic(y, alpha, beta, gamma, T) + + w_tilde = add_multiplicative_noise(w_true, sigma_noise, rng) + dy = central_first_derivative_uniform(w_tilde, h) + dyy = second_derivative_uniform(w_tilde, h) + + if dT_mode == "exact": + dT = dT_w_true + elif dT_mode == "noisy_ratio": + dT = w_tilde / T + else: + raise ValueError(dT_mode) + + sigma2_fd = local_variance_from_derivatives(y, w_tilde, dy, dyy, dT) + return sigma2_a, sigma2_fd + + +def rmse_curves_averaged( + y: np.ndarray, + h: float, + alpha: float, + beta: float, + gamma: float, + T: float, + sigma_grid: np.ndarray, + rng: np.random.Generator, + dT_mode: Literal["exact", "noisy_ratio"], + interior: slice, + trials_per_sigma: int, +) -> tuple[np.ndarray, np.ndarray]: + """ + For each σ in `sigma_grid`, average RMSE (relative and absolute) over + `trials_per_sigma` independent noise draws. + """ + rel: list[float] = [] + abs_: list[float] = [] + trials_per_sigma = max(1, trials_per_sigma) + + for sig in sigma_grid: + tr: list[float] = [] + ta: list[float] = [] + for _ in range(trials_per_sigma): + sa, sf = local_variance_one_draw( + y, h, alpha, beta, gamma, T, float(sig), rng, dT_mode + ) + tr.append(rmse_relative(sa, sf, interior)) + ta.append(rmse_absolute(sa, sf, interior)) + rel.append(float(np.nanmean(tr))) + abs_.append(float(np.nanmean(ta))) + + return np.asarray(rel, dtype=float), np.asarray(abs_, dtype=float) + + +def plot_rmse_vs_noise( + sigma_grid: np.ndarray, + rmse_rel: np.ndarray, + rmse_abs: np.ndarray, + *, + h: float, + T: float, + dT_mode: str, + trials_per_sigma: int, +) -> mpl.figure.Figure: + """ + Log–log plot: RMSE (relative and absolute in σ²) vs σ_noise, reference y = σ. + """ + fig, ax = plt.subplots(figsize=(5.8, 3.8), constrained_layout=True) + + x = np.asarray(sigma_grid, dtype=float) + pos = x > 0 + n = len(x) + ms = 3.5 if n > 50 else 4.5 + + ax.loglog( + x[pos], + rmse_rel[pos], + "o-", + ms=ms, + lw=1.25, + label=r"RMSE of relative error $(\sigma^2_{\mathrm{FD}}-\sigma^2_{\mathrm{nat}})/|\sigma^2_{\mathrm{nat}}|$", + zorder=3, + ) + ax.loglog( + x[pos], + rmse_abs[pos], + "s--", + ms=ms - 1, + lw=1.0, + alpha=0.9, + label=r"RMSE of $\sigma^2$ error $|\sigma^2_{\mathrm{FD}}-\sigma^2_{\mathrm{nat}}|$", + zorder=2, + ) + + s_lo, s_hi = float(x[pos].min()), float(x[pos].max()) + ax.loglog([s_lo, s_hi], [s_lo, s_hi], ":", color="0.4", lw=2.0, zorder=1, label=r"reference slope 1: $y=\sigma_{\mathrm{noise}}$") + + ax.set_xlabel(r"$\sigma_{\mathrm{noise}}$ (multiplicative noise on $\tilde{w}$)") + ax.set_ylabel("RMSE (interior $y$)") + subtitle = f"$T={T}$, $h={h:.4f}$, $\\partial_T w$: {dT_mode}" + if trials_per_sigma > 1: + subtitle += f", mean over {trials_per_sigma} draws per $\\sigma$" + ax.set_title("FD local variance: RMSE vs noise\n" + subtitle, fontsize=10) + ax.grid(True, which="both", alpha=0.35) + ax.legend(loc="best", fontsize=8, framealpha=0.95) + + return fig + + +def configure_matplotlib_style() -> None: + """Conservative defaults suitable for print.""" + mpl.rcParams.update( + { + "figure.dpi": 120, + "savefig.dpi": 300, + "font.size": 10, + "axes.labelsize": 10, + "axes.titlesize": 10, + "legend.fontsize": 8, + "axes.grid": True, + } + ) + + +def main() -> None: + configure_matplotlib_style() + + parser = argparse.ArgumentParser( + description="RMSE of finite-difference local variance vs multiplicative noise (single figure).", + formatter_class=argparse.ArgumentDefaultsHelpFormatter, + ) + parser.add_argument("--seed", type=int, default=42, help="RNG seed.") + parser.add_argument( + "--out", + type=str, + default="lv_rmse.png", + help="Output image path.", + ) + parser.add_argument( + "--dT-mode", + choices=("exact", "noisy_ratio"), + default="exact", + help="Treatment of ∂_T w when w is replaced by noisy w̃ on the grid.", + ) + parser.add_argument("--rmse-points", type=int, default=35, help="Number of σ_noise values (log-uniform).") + parser.add_argument("--rmse-sigma-min", type=float, default=1e-5, help="Smallest σ_noise.") + parser.add_argument("--rmse-sigma-max", type=float, default=5e-4, help="Largest σ_noise.") + parser.add_argument( + "--rmse-trials", + type=int, + default=50, + help="Independent noisy surfaces per σ_noise; RMSE is averaged.", + ) + args = parser.parse_args() + + rng = np.random.default_rng(args.seed) + y = np.linspace(Y_MIN, Y_MAX, N_GRID) + h = float(y[1] - y[0]) + interior = slice(1, -1) + + sigma_grid = log_uniform_sigma_grid(args.rmse_points, args.rmse_sigma_min, args.rmse_sigma_max) + rmse_rel, rmse_abs = rmse_curves_averaged( + y, + h, + ALPHA, + BETA, + GAMMA, + T_MATURITY, + sigma_grid, + rng, + args.dT_mode, + interior, + args.rmse_trials, + ) + + fig = plot_rmse_vs_noise( + sigma_grid, + rmse_rel, + rmse_abs, + h=h, + T=T_MATURITY, + dT_mode=args.dT_mode, + trials_per_sigma=args.rmse_trials, + ) + + ensure_parent_dir(args.out) + fig.savefig(args.out, bbox_inches="tight") + print(f"Wrote {args.out}") + plt.close(fig) + + +if __name__ == "__main__": + main() diff --git a/tests/test_black_scholes.cpp b/tests/test_black_scholes.cpp index b9f0f01..725acbe 100644 --- a/tests/test_black_scholes.cpp +++ b/tests/test_black_scholes.cpp @@ -3,6 +3,7 @@ // #include +#include "BlackScholesClosedFormEngine.hpp" #include "BlackScholesProcess.hpp" #include "MonteCarloEngine.hpp" #include "Instrument.hpp" @@ -51,3 +52,28 @@ TEST(BlackScholesProcess, ExpectedValue) { ASSERT_NEAR(callPrice, callGT, tol); ASSERT_NEAR(putPrice, putGT, tol); } + +TEST(BlackScholesClosedForm, MatchesReference) { + const double K = 100.0; + const double T = 1.0; + + const MarketData marketData( + 100.0, + std::make_shared(0.01), + std::make_shared(0.2)); + + auto processCall = std::make_unique(marketData); + auto processPut = std::make_unique(marketData); + + auto analyticCall = std::make_unique(std::move(processCall)); + auto analyticPut = std::make_unique(std::move(processPut)); + + Instrument callInstr(T, std::make_unique(K), std::move(analyticCall)); + Instrument putInstr(T, std::make_unique(K), std::move(analyticPut)); + + const double callGT = 8.4333186901; + const double putGT = 7.4383020650; + + ASSERT_NEAR(callInstr.price(), callGT, 1e-9); + ASSERT_NEAR(putInstr.price(), putGT, 1e-9); +}