RV Project

2. Data Audit

Show code
# 2.1 Imports and paths
from pathlib import Path
import itertools
import json
import os
import warnings

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import statsmodels.api as sm
from statsmodels.tsa.stattools import adfuller, kpss
from tabulate import tabulate

# Wide-format parquet holds all series aligned by date; this is a statistical data audit, not a tradeable panel.
input_path = "data/combined/all_datasets_wide.parquet"
out_dir = Path("data/derived")
out_dir.mkdir(parents=True, exist_ok=True)


def show_table(df_table: pd.DataFrame) -> None:
    print(tabulate(df_table, headers="keys", tablefmt="psql", showindex=False))

def write_df_csv_and_md(
    df: pd.DataFrame, csv_path: Path, md_path: Path, float_round: int = 6
) -> None:
    df.to_csv(csv_path, index=False)
    df_md = df.copy()
    num_cols = df_md.select_dtypes(include=["number"]).columns
    df_md[num_cols] = df_md[num_cols].round(float_round)
    md_path.write_text(df_md.to_markdown(index=False), encoding="utf-8")


def ensure_naive_dates(index: pd.Index | pd.DatetimeIndex) -> pd.DatetimeIndex:
    idx = pd.to_datetime(index, errors="coerce")
    if getattr(idx, "tz", None) is not None:
        # Drop timezone without shifting calendar dates.
        idx = idx.tz_localize(None)
    return idx


def compute_missing_streaks(series: pd.Series, index: pd.DatetimeIndex) -> pd.DataFrame:
    is_missing = series.isna().astype(int).values
    idx = index
    segments = []
    i = 0
    n = len(is_missing)
    while i < n:
        if is_missing[i] == 1:
            j = i + 1
            while j < n and is_missing[j] == 1:
                j += 1
            start = idx[i]
            end = idx[j - 1]
            length_days = (end - start).days + 1
            length_bus = int(j - i)
            segments.append(
                {
                    "segment_start": start,
                    "segment_end": end,
                    "length_days": length_days,
                    "length_business_days": length_bus,
                }
            )
            i = j
        else:
            i += 1
    return pd.DataFrame(segments)


def apply_causality_shift(
    refit_wide: pd.DataFrame, trade_index: pd.DatetimeIndex, label: str
) -> tuple[pd.DataFrame, pd.Series]:
    if refit_wide.index.has_duplicates:
        raise ValueError(f"{label}: refit index has duplicates; cannot apply causality shift safely.")
    trade_index = ensure_naive_dates(trade_index)
    trade_index = trade_index.sort_values()
    refit_wide = refit_wide.sort_index()

    # Forward-fill refit outputs to daily, then shift 1 observation to enforce t-1 information.
    daily = refit_wide.reindex(trade_index).ffill().shift(1)
    refit_marker = pd.Series(refit_wide.index, index=refit_wide.index, name="refit_date")
    refit_daily = refit_marker.reindex(trade_index).ffill().shift(1)

    daily = daily.dropna(axis=0, how="any")
    refit_daily = refit_daily.reindex(daily.index)

    if len(daily) and not daily.index.is_monotonic_increasing:
        raise ValueError(f"{label}: daily index not monotonic after shift.")
    return daily, refit_daily


def is_treasury_par_curve_tenor(name: str) -> bool:
    s = str(name).lower()
    parts = s.split("_")
    if len(parts) != 2:
        return False
    n, unit = parts
    if not n.isdigit():
        return False
    return unit in {"mo", "yr", "month", "months"}


def tenor_to_years(tenor: str) -> float:
    mapping = {
        "3_mo": 0.25,
        "6_mo": 0.50,
        "1_yr": 1.00,
        "2_yr": 2.00,
        "3_yr": 3.00,
        "5_yr": 5.00,
        "7_yr": 7.00,
        "10_yr": 10.00,
    }
    if tenor not in mapping:
        raise ValueError(f"Unsupported tenor for duration mapping: {tenor!r}")
    return float(mapping[tenor])


def modified_duration_from_yield(y_decimal: float, maturity_years: float) -> float:
    if not np.isfinite(y_decimal):
        return np.nan
    if maturity_years < 1.0:
        denom = 1.0 + y_decimal
        return float(maturity_years / denom) if denom > 0 else np.nan

    face = 100.0
    if maturity_years == 1.0:
        freq = 1
    else:
        freq = 2

    per = y_decimal / freq
    denom = 1.0 + per
    if denom <= 0:
        return np.nan
    n_periods = int(round(maturity_years * freq))
    if n_periods <= 0:
        return np.nan
    coupon = face * y_decimal / freq

    price = 0.0
    weighted_sum = 0.0
    for i in range(1, n_periods + 1):
        t_years = i / freq
        cf = coupon + (face if i == n_periods else 0.0)
        pv = cf / (denom ** i)
        price += pv
        weighted_sum += t_years * pv
    if price <= 0:
        return np.nan
    macaulay = weighted_sum / price
    modified = macaulay / denom
    return float(modified)


def build_duration_panel(
    df_yields_percent: pd.DataFrame, tenors: list[str]
) -> pd.DataFrame:
    panel = pd.DataFrame(index=df_yields_percent.index, columns=tenors, dtype="float64")
    for tenor in tenors:
        maturity_years = tenor_to_years(tenor)
        y_series = df_yields_percent[tenor].astype("float64")
        panel[tenor] = y_series.apply(
            lambda y: modified_duration_from_yield(y / 100.0, maturity_years)
            if pd.notna(y)
            else np.nan
        )
    return panel


def build_dv01_returns(
    df_yields: pd.DataFrame, dur_input: pd.Series | pd.DataFrame
) -> tuple[pd.DataFrame, pd.DataFrame]:
    # Output is a dimensionless first-order price return proxy from yield changes using a duration proxy in years.
    # It is not a dollar DV01, not a bond total return, and not a tradeable instrument PnL.
    dy_decimal = df_yields.diff().div(100.0)
    if isinstance(dur_input, pd.Series):
        dur_vec = dur_input.reindex(df_yields.columns)
        missing = dur_vec[dur_vec.isna()].index.tolist()
        if missing:
            raise ValueError(f"Missing duration assumptions for tenors: {missing}")
        duration_scaled_return_proxy = dy_decimal.mul(-dur_vec, axis=1)
    else:
        dur_panel = dur_input.reindex(index=df_yields.index, columns=df_yields.columns)
        dur_panel = dur_panel.shift(1)
        duration_scaled_return_proxy = dy_decimal * (-dur_panel)
    dy_decimal = dy_decimal.dropna(axis=0, how="any")
    duration_scaled_return_proxy = (
        duration_scaled_return_proxy.reindex(dy_decimal.index).dropna(axis=0, how="any")
    )
    dy_decimal = dy_decimal.reindex(duration_scaled_return_proxy.index)
    return dy_decimal, duration_scaled_return_proxy


# Units note for the duration scaled return proxy
# If duration is 5 years and yield rises by 1 bp then dy_decimal is 0.0001 and the proxy return is about -0.0005,
# which is about -5 bp in price return terms.


def pca_svd(Xc: np.ndarray, k: int = 3) -> tuple[np.ndarray, np.ndarray, np.ndarray]:
    if Xc.ndim != 2:
        raise ValueError("Xc must be 2D array")
    n_obs = Xc.shape[0]
    if n_obs < 2:
        raise ValueError("Need at least 2 observations for PCA")
    U, S, Vt = np.linalg.svd(Xc, full_matrices=False)
    loadings = Vt[:k, :]
    scores = U[:, :k] * S[:k]
    var = (S**2) / max(n_obs - 1, 1)
    evr = var[:k] / var.sum()
    return loadings, scores, evr


def align_signs(loadings: np.ndarray, tenors: list[str]) -> np.ndarray:
    aligned = loadings.copy()
    tenor_index = {t: i for i, t in enumerate(tenors)}

    if np.nanmean(aligned[0]) < 0:
        aligned[0] *= -1

    idx_3m = tenor_index.get("3_mo")
    idx_10y = tenor_index.get("10_yr")
    idx_5y = tenor_index.get("5_yr")

    if idx_3m is not None and idx_10y is not None:
        slope = aligned[1, idx_10y] - aligned[1, idx_3m]
        if slope < 0:
            aligned[1] *= -1

    if idx_3m is not None and idx_10y is not None and idx_5y is not None:
        wings = 0.5 * (aligned[2, idx_3m] + aligned[2, idx_10y])
        belly = aligned[2, idx_5y]
        if (wings - belly) < 0:
            aligned[2] *= -1

    return aligned


def _match_and_orient_impl(
    loadings_now: np.ndarray, loadings_prev: np.ndarray | None
) -> tuple[np.ndarray, str, list[bool], list[float]]:
    if loadings_now.ndim != 2:
        raise ValueError("loadings_now must be a 2D array")
    if loadings_prev is None:
        k = loadings_now.shape[0]
        sims = [np.nan] * k
        flip_flags = [False] * k
        perm_used = "-".join(str(i) for i in range(k))
        return loadings_now.copy(), perm_used, flip_flags, sims

    if loadings_prev.ndim != 2:
        raise ValueError("loadings_prev must be a 2D array")
    if loadings_prev.shape != loadings_now.shape:
        raise ValueError(
            "loadings_prev and loadings_now must have matching shapes; "
            f"got {loadings_prev.shape} and {loadings_now.shape}"
        )

    k = loadings_now.shape[0]
    if k == 0:
        return loadings_now.copy(), "", [], []
    perms = list(itertools.permutations(range(k)))
    best_perm = perms[0]
    best_score = -np.inf
    for perm in perms:
        score = 0.0
        for i in range(k):
            score += abs(float(np.dot(loadings_now[perm[i]], loadings_prev[i])))
        if score > best_score:
            best_score = score
            best_perm = perm

    aligned = loadings_now[list(best_perm), :].copy()
    flip_flags: list[bool] = []
    sims: list[float] = []
    for i in range(k):
        dot_val = float(np.dot(aligned[i], loadings_prev[i]))
        if dot_val < 0:
            aligned[i] *= -1
            flip_flags.append(True)
            dot_val = -dot_val
        else:
            flip_flags.append(False)
        sims.append(dot_val)

    perm_used = "-".join(str(i) for i in best_perm)
    return aligned, perm_used, flip_flags, sims


def _parse_perm_used(perm_used: str, k: int) -> list[int]:
    if k == 0:
        return []
    if not perm_used:
        return list(range(k))
    parts = perm_used.split("-")
    if len(parts) != k:
        raise ValueError(f"perm_used has length {len(parts)} but expected {k}")
    perm = [int(p) for p in parts]
    if sorted(perm) != list(range(k)):
        raise ValueError(f"perm_used is not a valid permutation: {perm_used}")
    return perm


def _apply_component_alignment(
    arr: np.ndarray, perm: list[int], flip_flags: list[bool] | None = None
) -> np.ndarray:
    aligned = np.asarray(arr)[perm].copy()
    if flip_flags:
        signs = np.where(np.array(flip_flags, dtype=bool), -1.0, 1.0)
        aligned *= signs.reshape((len(signs),) + (1,) * (aligned.ndim - 1))
    return aligned


def _self_test_match_and_orient_impl() -> None:
    rng = np.random.default_rng(0)
    k, n = 3, 7
    A = rng.normal(size=(k, n))
    q, _ = np.linalg.qr(A.T)
    loadings_prev = q.T

    perm = np.array([2, 0, 1])
    signs = rng.choice([-1.0, 1.0], size=k)
    loadings_now = signs[:, None] * loadings_prev[perm]

    aligned, _, _, _ = _match_and_orient_impl(loadings_now, loadings_prev)
    for i in range(k):
        dot_val = float(np.dot(aligned[i], loadings_prev[i]))
        if dot_val <= 0:
            raise AssertionError("Aligned component has non-positive similarity")
        if not np.isclose(abs(dot_val), 1.0, atol=1e-10):
            raise AssertionError("Aligned component similarity not close to 1")

    loadings_now = loadings_prev.copy()
    loadings_now[1] *= -1
    aligned, _, _, _ = _match_and_orient_impl(loadings_now, loadings_prev)
    dots = np.einsum("ij,ij->i", aligned, loadings_prev)
    if not np.all(dots > 0):
        raise AssertionError("Sign flips were not corrected")


if __name__ == "__main__" and os.environ.get("RV_SELF_TEST") == "1":
    _self_test_match_and_orient_impl()
    raise SystemExit(0)


def match_and_orient_loadings(
    loadings_now: np.ndarray, loadings_prev: np.ndarray | None
) -> np.ndarray:
    aligned, _, _, _ = _match_and_orient_impl(loadings_now, loadings_prev)
    return aligned


def match_and_orient_diagnostics(
    loadings_now: np.ndarray, loadings_prev: np.ndarray | None
) -> tuple[str, list[bool], list[float]]:
    _, perm_used, flip_flags, sims = _match_and_orient_impl(loadings_now, loadings_prev)
    return perm_used, flip_flags, sims
Show code
# 2.2 Load wide dataset and normalize index
# Normalize dates to guard against mixed date columns, integer day counts, and timezones that can break joins.
if not Path(input_path).exists():
    raise FileNotFoundError(f"Missing input parquet: {input_path}")

df = pd.read_parquet(input_path)

if "date" in df.columns:
    df = df.copy()
    df["date"] = pd.to_datetime(df["date"], errors="coerce")
    df = df.set_index("date")

raw_index = df.index
df.index = ensure_naive_dates(df.index)
assert isinstance(df.index, pd.DatetimeIndex)

df = df.loc[~df.index.isna()].sort_index()

if len(df.index):
    max_year = int(df.index.max().year)
    if max_year < 1980 and pd.api.types.is_numeric_dtype(raw_index):
        # Some files encode dates as day offsets; try a unit-based parse if the year range looks wrong.
        alt_index = pd.to_datetime(raw_index, errors="coerce", unit="D")
        if isinstance(alt_index, pd.DatetimeIndex) and len(alt_index):
            alt_index = alt_index.tz_localize(None) if getattr(alt_index, "tz", None) else alt_index
            alt_index = alt_index[~alt_index.isna()]
            if len(alt_index) and int(alt_index.max().year) >= 1980:
                df = df.copy()
                df.index = alt_index
                df = df.sort_index()

duplicate_dates_count = int(df.index.duplicated().sum())
# Duplicate dates would double-count observations and corrupt any time-series alignment or backtest calendar.
assert df.index.is_monotonic_increasing
assert df.index.has_duplicates is False

# Overall missing percent is the share of NaNs across the full panel; high values can invalidate a daily signal.
overall_missing_percent = float(df.isna().mean().mean() * 100.0)

start_ts = df.index.min()
end_ts = df.index.max()

ds_summary = pd.DataFrame(
    [
        {
            "rows": int(len(df)),
            "cols": int(df.shape[1]),
            "start_date": None if start_ts is pd.NaT else pd.Timestamp(start_ts).to_pydatetime(warn=False),
            "end_date": None if end_ts is pd.NaT else pd.Timestamp(end_ts).to_pydatetime(warn=False),
            "duplicates_count": duplicate_dates_count,
            "overall_missing_percent": overall_missing_percent,
        }
    ]
)

show_table(ds_summary)
+--------+--------+---------------------+---------------------+--------------------+---------------------------+
|   rows |   cols | start_date          | end_date            |   duplicates_count |   overall_missing_percent |
|--------+--------+---------------------+---------------------+--------------------+---------------------------|
|  16709 |     29 | 1962-01-02 00:00:00 | 2026-01-16 00:00:00 |                  0 |                   47.4019 |
+--------+--------+---------------------+---------------------+--------------------+---------------------------+
Show code
# 2.3 Column inventory and group summary
# Grouping distinguishes curve points (tenor yields) from macro series; curve points are not tradeable instruments.
macro_fields = {"rrp", "fed_assets", "tga", "eurofx"}

rows = []
for column_name in df.columns:
    s = df[column_name]

    if str(column_name).startswith("DGS"):
        group = "fred_dgs"
    elif str(column_name).lower() in macro_fields:
        group = "macro"
    elif is_treasury_par_curve_tenor(str(column_name)):
        # Treat par curve tenors as curve points used for statistical factor extraction, not tradeable assets.
        group = "treasury_par_curve"
    else:
        group = "other"

    obs_count = int(s.notna().sum())
    missing_count = int(s.isna().sum())
    rows.append(
        {
            "column_name": str(column_name),
            "group": group,
            "dtype": str(s.dtype),
            "start_date": s.first_valid_index(),
            "end_date": s.last_valid_index(),
            "obs_count": obs_count,
            "missing_percent": float(missing_count / len(df) * 100.0) if len(df) else np.nan,
        }
    )

col_audit = pd.DataFrame(rows)

col_audit_sorted = col_audit.sort_values(
    ["group", "start_date", "column_name"], na_position="last"
).reset_index(drop=True)

group_summary = (
    col_audit_sorted.groupby("group", as_index=False)
    .agg(
        column_count=("column_name", "size"),
        median_start_date=("start_date", "median"),
        median_end_date=("end_date", "median"),
        median_missing_percent=("missing_percent", "median"),
    )
    .sort_values("group")
    .reset_index(drop=True)
)

show_table(group_summary)

if len(col_audit_sorted) <= 40:
    show_table(col_audit_sorted)
else:
    note = pd.DataFrame(
        [
            {
                "note": "Column inventory truncated for display",
                "total_rows": int(len(col_audit_sorted)),
                "shown_head": 20,
                "shown_tail": 20,
            }
        ]
    )
    show_table(note)
    show_table(pd.concat([col_audit_sorted.head(20), col_audit_sorted.tail(20)], ignore_index=True))

col_audit_sorted.to_parquet(
    out_dir / "data_audit_column_inventory.parquet", engine="pyarrow", index=False
)
+--------------------+----------------+---------------------+---------------------+--------------------------+
| group              |   column_count | median_start_date   | median_end_date     |   median_missing_percent |
|--------------------+----------------+---------------------+---------------------+--------------------------|
| fred_dgs           |             11 | 1969-07-01 00:00:00 | 2026-01-15 00:00:00 |                  15.4707 |
| macro              |              4 | 2002-12-18 00:00:00 | 2026-01-14 00:00:00 |                  86.9352 |
| treasury_par_curve |             14 | 1990-01-02 00:00:00 | 2026-01-16 00:00:00 |                  46.05   |
+--------------------+----------------+---------------------+---------------------+--------------------------+
+---------------+--------------------+---------+---------------------+---------------------+-------------+-------------------+
| column_name   | group              | dtype   | start_date          | end_date            |   obs_count |   missing_percent |
|---------------+--------------------+---------+---------------------+---------------------+-------------+-------------------|
| DGS1          | fred_dgs           | float64 | 1962-01-02 00:00:00 | 2026-01-15 00:00:00 |       15994 |           4.27913 |
| DGS10         | fred_dgs           | float64 | 1962-01-02 00:00:00 | 2026-01-15 00:00:00 |       15994 |           4.27913 |
| DGS20         | fred_dgs           | float64 | 1962-01-02 00:00:00 | 2026-01-15 00:00:00 |       14305 |          14.3875  |
| DGS3          | fred_dgs           | float64 | 1962-01-02 00:00:00 | 2026-01-15 00:00:00 |       15994 |           4.27913 |
| DGS5          | fred_dgs           | float64 | 1962-01-02 00:00:00 | 2026-01-15 00:00:00 |       15994 |           4.27913 |
| DGS7          | fred_dgs           | float64 | 1969-07-01 00:00:00 | 2026-01-15 00:00:00 |       14124 |          15.4707  |
| DGS2          | fred_dgs           | float64 | 1976-06-01 00:00:00 | 2026-01-15 00:00:00 |       12402 |          25.7765  |
| DGS30         | fred_dgs           | float64 | 1977-02-15 00:00:00 | 2026-01-15 00:00:00 |       12224 |          26.8418  |
| DGS3MO        | fred_dgs           | float64 | 1981-09-01 00:00:00 | 2026-01-15 00:00:00 |       11092 |          33.6166  |
| DGS6MO        | fred_dgs           | float64 | 1981-09-01 00:00:00 | 2026-01-15 00:00:00 |       11092 |          33.6166  |
| DGS1MO        | fred_dgs           | float64 | 2001-07-31 00:00:00 | 2026-01-15 00:00:00 |        6116 |          63.397   |
| eurofx        | macro              | float64 | 1999-01-04 00:00:00 | 2026-01-09 00:00:00 |        6776 |          59.447   |
| fed_assets    | macro              | float64 | 2002-12-18 00:00:00 | 2026-01-14 00:00:00 |        1205 |          92.7883  |
| tga           | macro              | float64 | 2002-12-18 00:00:00 | 2026-01-14 00:00:00 |        1205 |          92.7883  |
| rrp           | macro              | float64 | 2003-02-07 00:00:00 | 2026-01-16 00:00:00 |        3161 |          81.0821  |
| 10_yr         | treasury_par_curve | float64 | 1990-01-02 00:00:00 | 2026-01-16 00:00:00 |        9016 |          46.0411  |
| 1_yr          | treasury_par_curve | float64 | 1990-01-02 00:00:00 | 2026-01-16 00:00:00 |        9016 |          46.0411  |
| 2_yr          | treasury_par_curve | float64 | 1990-01-02 00:00:00 | 2026-01-16 00:00:00 |        9016 |          46.0411  |
| 30_yr         | treasury_par_curve | float64 | 1990-01-02 00:00:00 | 2026-01-16 00:00:00 |        8022 |          51.9899  |
| 3_mo          | treasury_par_curve | float64 | 1990-01-02 00:00:00 | 2026-01-16 00:00:00 |        9013 |          46.059   |
| 3_yr          | treasury_par_curve | float64 | 1990-01-02 00:00:00 | 2026-01-16 00:00:00 |        9016 |          46.0411  |
| 5_yr          | treasury_par_curve | float64 | 1990-01-02 00:00:00 | 2026-01-16 00:00:00 |        9016 |          46.0411  |
| 6_mo          | treasury_par_curve | float64 | 1990-01-02 00:00:00 | 2026-01-16 00:00:00 |        9016 |          46.0411  |
| 7_yr          | treasury_par_curve | float64 | 1990-01-02 00:00:00 | 2026-01-16 00:00:00 |        9016 |          46.0411  |
| 20_yr         | treasury_par_curve | float64 | 1993-10-01 00:00:00 | 2026-01-16 00:00:00 |        8077 |          51.6608  |
| 1_mo          | treasury_par_curve | float64 | 2001-07-31 00:00:00 | 2026-01-16 00:00:00 |        6117 |          63.391   |
| 2_mo          | treasury_par_curve | float64 | 2018-10-16 00:00:00 | 2026-01-16 00:00:00 |        1812 |          89.1555  |
| 4_mo          | treasury_par_curve | float64 | 2022-10-19 00:00:00 | 2026-01-16 00:00:00 |         810 |          95.1523  |
| 15_month      | treasury_par_curve | float64 | 2025-02-18 00:00:00 | 2026-01-16 00:00:00 |         229 |          98.6295  |
+---------------+--------------------+---------+---------------------+---------------------+-------------+-------------------+
Show code
# 2.4 Availability snapshot by era
# Era coverage checks whether a long-history signal is feasible given data availability by regime.
core_curve = ["3_mo", "6_mo", "1_yr", "2_yr", "3_yr", "5_yr", "7_yr", "10_yr"]
core_fred = ["DGS2", "DGS5", "DGS10", "DGS30"]
core_macro = ["rrp", "fed_assets", "tga", "eurofx"]

era_buckets = {
    "pre_2008": (pd.Timestamp("1990-01-02"), pd.Timestamp("2007-12-31")),
    "post_2008": (pd.Timestamp("2008-01-01"), pd.Timestamp("2019-12-31")),
    "post_2020": (pd.Timestamp("2020-01-01"), None),
}

group_core_map = {
    "treasury_par_curve": [c for c in core_curve if c in df.columns],
    "fred_dgs": [c for c in core_fred if c in df.columns],
    "macro": [c for c in core_macro if c in df.columns],
}

groups = ["treasury_par_curve", "fred_dgs", "macro"]
rows = []
for group in groups:
    group_cols = col_audit_sorted.loc[col_audit_sorted["group"] == group, "column_name"].tolist()
    core_cols = group_core_map.get(group, [])

    for era, (start, end) in era_buckets.items():
        if end is None:
            mask = df.index >= start
        else:
            mask = (df.index >= start) & (df.index <= end)

        n_days_in_bucket = int(mask.sum())
        if n_days_in_bucket == 0:
            rows.append(
                {
                    "group": group,
                    "era": era,
                    "n_days_in_bucket": 0,
                    "any_non_null_days": 0,
                    "all_core_days": 0,
                    "coverage_any_pct": np.nan,
                    "coverage_all_core_pct": np.nan,
                }
)
            continue

        if group_cols:
            any_non_null_days = int(df.loc[mask, group_cols].notna().any(axis=1).sum())
        else:
            any_non_null_days = 0

        if core_cols:
            all_core_days = int(df.loc[mask, core_cols].notna().all(axis=1).sum())
        else:
            all_core_days = 0

        rows.append(
            {
                "group": group,
                "era": era,
                "n_days_in_bucket": n_days_in_bucket,
                "any_non_null_days": any_non_null_days,
                "all_core_days": all_core_days,
                "coverage_any_pct": float(any_non_null_days / n_days_in_bucket),
                "coverage_all_core_pct": float(all_core_days / n_days_in_bucket) if core_cols else np.nan,
            }
)

era_coverage = pd.DataFrame(rows)
show_table(era_coverage)
+--------------------+-----------+--------------------+---------------------+-----------------+--------------------+-------------------------+
| group              | era       |   n_days_in_bucket |   any_non_null_days |   all_core_days |   coverage_any_pct |   coverage_all_core_pct |
|--------------------+-----------+--------------------+---------------------+-----------------+--------------------+-------------------------|
| treasury_par_curve | pre_2008  |               4695 |                4503 |            4503 |           0.959105 |              0.959105   |
| treasury_par_curve | post_2008 |               3131 |                3002 |            2999 |           0.958799 |              0.957841   |
| treasury_par_curve | post_2020 |               1578 |                1511 |            1511 |           0.957541 |              0.957541   |
| fred_dgs           | pre_2008  |               4695 |                4503 |            4503 |           0.959105 |              0.959105   |
| fred_dgs           | post_2008 |               3131 |                3002 |            3002 |           0.958799 |              0.958799   |
| fred_dgs           | post_2020 |               1578 |                1510 |            1510 |           0.956907 |              0.956907   |
| macro              | pre_2008  |               4695 |                2268 |               5 |           0.483067 |              0.00106496 |
| macro              | post_2008 |               3131 |                3020 |             339 |           0.964548 |              0.108272   |
| macro              | post_2020 |               1578 |                1520 |             309 |           0.963245 |              0.195817   |
+--------------------+-----------+--------------------+---------------------+-----------------+--------------------+-------------------------+
Show code
# 2.5 Frequency diagnostics (macro and non-daily)
# Detect non-daily series; weekly or irregular data can break daily trading logic and create lookahead risk.
rows = []

for column_name in df.columns:
    s = df[column_name]
    obs_idx = df.index[s.notna()]
    if len(obs_idx) < 3:
        median_gap_days = np.nan
        freq_label = "irregular"
    else:
        gaps = obs_idx.to_series().diff().dt.days.dropna()
        median_gap_days = float(gaps.median()) if len(gaps) else np.nan

        if median_gap_days <= 1.5:
            freq_label = "daily"
        elif 5.0 <= median_gap_days <= 9.0:
            freq_label = "weekly"
        else:
            freq_label = "irregular"

    group = (
        col_audit_sorted.loc[col_audit_sorted["column_name"] == str(column_name), "group"]
        .iloc[0]
        if (col_audit_sorted["column_name"] == str(column_name)).any()
        else "other"
    )

    if group == "macro" or freq_label != "daily":
        meta = col_audit_sorted.loc[col_audit_sorted["column_name"] == str(column_name)].iloc[0]
        rows.append(
            {
                "column_name": str(column_name),
                "group": group,
                "start_date": meta["start_date"],
                "end_date": meta["end_date"],
                "obs_count": int(meta["obs_count"]),
                "median_gap_days": median_gap_days,
                "freq_label": freq_label,
            }
)

freq_summary = pd.DataFrame(rows).sort_values(
    ["group", "freq_label", "median_gap_days", "column_name"], na_position="last"
)

show_table(freq_summary)
+---------------+---------+---------------------+---------------------+-------------+-------------------+--------------+
| column_name   | group   | start_date          | end_date            |   obs_count |   median_gap_days | freq_label   |
|---------------+---------+---------------------+---------------------+-------------+-------------------+--------------|
| eurofx        | macro   | 1999-01-04 00:00:00 | 2026-01-09 00:00:00 |        6776 |                 1 | daily        |
| rrp           | macro   | 2003-02-07 00:00:00 | 2026-01-16 00:00:00 |        3161 |                 1 | daily        |
| fed_assets    | macro   | 2002-12-18 00:00:00 | 2026-01-14 00:00:00 |        1205 |                 7 | weekly       |
| tga           | macro   | 2002-12-18 00:00:00 | 2026-01-14 00:00:00 |        1205 |                 7 | weekly       |
+---------------+---------+---------------------+---------------------+-------------+-------------------+--------------+
Show code
# 2.6 Instrument decision workbook
# Thresholds are a data viability screen, not a statement of tradeability or implementability.
min_start_date = pd.Timestamp("1995-01-01")
max_missing_percent = 5.0
max_median_gap_days = 2.0

freq_map = (
    freq_summary.set_index("column_name")["median_gap_days"].astype("float64").to_dict()
    if len(freq_summary)
    else {}
)

rows = []
for _, row in col_audit_sorted.iterrows():
    column_name = str(row["column_name"])
    start_date = row["start_date"]
    end_date = row["end_date"]
    missing_percent = float(row["missing_percent"]) if pd.notna(row["missing_percent"]) else np.nan
    median_gap_days = float(freq_map.get(column_name, 1.0))

    reasons = []
    ok_freq = median_gap_days <= max_median_gap_days
    if not ok_freq:
        reasons.append("frequency")

    ok_history = bool(pd.notna(start_date) and pd.Timestamp(start_date) <= min_start_date)
    if not ok_history:
        reasons.append("history")

    ok_missing = bool(pd.notna(missing_percent) and missing_percent <= max_missing_percent)
    if not ok_missing:
        reasons.append("missingness")

    # Even if recommended, curve yields remain non-tradeable; this is purely statistical filtering.
    recommended = ok_freq and ok_history and ok_missing
    reason = "meets thresholds" if recommended else "fails " + ", ".join(reasons)

    rows.append(
        {
            "series_name": column_name,
            "group": row["group"],
            "start_date": start_date,
            "end_date": end_date,
            "missing_percent": missing_percent,
            "median_gap_days": median_gap_days,
            "recommended_for_backtest": "yes" if recommended else "no",
            "reason": reason,
        }
)

instrument_candidates = pd.DataFrame(rows).sort_values(
    ["recommended_for_backtest", "group", "series_name"], ascending=[False, True, True]
).reset_index(drop=True)

decision_params = pd.DataFrame(
    [
        {"param": "min_start_date", "value": pd.Timestamp(min_start_date).date().isoformat()},
        {"param": "max_missing_percent", "value": float(max_missing_percent)},
        {"param": "max_median_gap_days", "value": float(max_median_gap_days)},
    ]
)

show_table(decision_params)
show_table(instrument_candidates)

instrument_candidates.to_parquet(
    out_dir / "instrument_candidates.parquet", engine="pyarrow", index=False
)
+---------------------+------------+
| param               | value      |
|---------------------+------------|
| min_start_date      | 1995-01-01 |
| max_missing_percent | 5.0        |
| max_median_gap_days | 2.0        |
+---------------------+------------+
+---------------+--------------------+---------------------+---------------------+-------------------+-------------------+----------------------------+---------------------------------------+
| series_name   | group              | start_date          | end_date            |   missing_percent |   median_gap_days | recommended_for_backtest   | reason                                |
|---------------+--------------------+---------------------+---------------------+-------------------+-------------------+----------------------------+---------------------------------------|
| DGS1          | fred_dgs           | 1962-01-02 00:00:00 | 2026-01-15 00:00:00 |           4.27913 |                 1 | yes                        | meets thresholds                      |
| DGS10         | fred_dgs           | 1962-01-02 00:00:00 | 2026-01-15 00:00:00 |           4.27913 |                 1 | yes                        | meets thresholds                      |
| DGS3          | fred_dgs           | 1962-01-02 00:00:00 | 2026-01-15 00:00:00 |           4.27913 |                 1 | yes                        | meets thresholds                      |
| DGS5          | fred_dgs           | 1962-01-02 00:00:00 | 2026-01-15 00:00:00 |           4.27913 |                 1 | yes                        | meets thresholds                      |
| DGS1MO        | fred_dgs           | 2001-07-31 00:00:00 | 2026-01-15 00:00:00 |          63.397   |                 1 | no                         | fails history, missingness            |
| DGS2          | fred_dgs           | 1976-06-01 00:00:00 | 2026-01-15 00:00:00 |          25.7765  |                 1 | no                         | fails missingness                     |
| DGS20         | fred_dgs           | 1962-01-02 00:00:00 | 2026-01-15 00:00:00 |          14.3875  |                 1 | no                         | fails missingness                     |
| DGS30         | fred_dgs           | 1977-02-15 00:00:00 | 2026-01-15 00:00:00 |          26.8418  |                 1 | no                         | fails missingness                     |
| DGS3MO        | fred_dgs           | 1981-09-01 00:00:00 | 2026-01-15 00:00:00 |          33.6166  |                 1 | no                         | fails missingness                     |
| DGS6MO        | fred_dgs           | 1981-09-01 00:00:00 | 2026-01-15 00:00:00 |          33.6166  |                 1 | no                         | fails missingness                     |
| DGS7          | fred_dgs           | 1969-07-01 00:00:00 | 2026-01-15 00:00:00 |          15.4707  |                 1 | no                         | fails missingness                     |
| eurofx        | macro              | 1999-01-04 00:00:00 | 2026-01-09 00:00:00 |          59.447   |                 1 | no                         | fails history, missingness            |
| fed_assets    | macro              | 2002-12-18 00:00:00 | 2026-01-14 00:00:00 |          92.7883  |                 7 | no                         | fails frequency, history, missingness |
| rrp           | macro              | 2003-02-07 00:00:00 | 2026-01-16 00:00:00 |          81.0821  |                 1 | no                         | fails history, missingness            |
| tga           | macro              | 2002-12-18 00:00:00 | 2026-01-14 00:00:00 |          92.7883  |                 7 | no                         | fails frequency, history, missingness |
| 10_yr         | treasury_par_curve | 1990-01-02 00:00:00 | 2026-01-16 00:00:00 |          46.0411  |                 1 | no                         | fails missingness                     |
| 15_month      | treasury_par_curve | 2025-02-18 00:00:00 | 2026-01-16 00:00:00 |          98.6295  |                 1 | no                         | fails history, missingness            |
| 1_mo          | treasury_par_curve | 2001-07-31 00:00:00 | 2026-01-16 00:00:00 |          63.391   |                 1 | no                         | fails history, missingness            |
| 1_yr          | treasury_par_curve | 1990-01-02 00:00:00 | 2026-01-16 00:00:00 |          46.0411  |                 1 | no                         | fails missingness                     |
| 20_yr         | treasury_par_curve | 1993-10-01 00:00:00 | 2026-01-16 00:00:00 |          51.6608  |                 1 | no                         | fails missingness                     |
| 2_mo          | treasury_par_curve | 2018-10-16 00:00:00 | 2026-01-16 00:00:00 |          89.1555  |                 1 | no                         | fails history, missingness            |
| 2_yr          | treasury_par_curve | 1990-01-02 00:00:00 | 2026-01-16 00:00:00 |          46.0411  |                 1 | no                         | fails missingness                     |
| 30_yr         | treasury_par_curve | 1990-01-02 00:00:00 | 2026-01-16 00:00:00 |          51.9899  |                 1 | no                         | fails missingness                     |
| 3_mo          | treasury_par_curve | 1990-01-02 00:00:00 | 2026-01-16 00:00:00 |          46.059   |                 1 | no                         | fails missingness                     |
| 3_yr          | treasury_par_curve | 1990-01-02 00:00:00 | 2026-01-16 00:00:00 |          46.0411  |                 1 | no                         | fails missingness                     |
| 4_mo          | treasury_par_curve | 2022-10-19 00:00:00 | 2026-01-16 00:00:00 |          95.1523  |                 1 | no                         | fails history, missingness            |
| 5_yr          | treasury_par_curve | 1990-01-02 00:00:00 | 2026-01-16 00:00:00 |          46.0411  |                 1 | no                         | fails missingness                     |
| 6_mo          | treasury_par_curve | 1990-01-02 00:00:00 | 2026-01-16 00:00:00 |          46.0411  |                 1 | no                         | fails missingness                     |
| 7_yr          | treasury_par_curve | 1990-01-02 00:00:00 | 2026-01-16 00:00:00 |          46.0411  |                 1 | no                         | fails missingness                     |
+---------------+--------------------+---------------------+---------------------+-------------------+-------------------+----------------------------+---------------------------------------+

3. Data engineering pipeline and canonical curve panel

Show code
# 3.1 Imports and paths
# Build a canonical curve panel: consistent tenors on a shared observation calendar for PCA inputs (not tradable).

derived_dir = Path("data/derived")
derived_dir.mkdir(parents=True, exist_ok=True)
output_path = derived_dir / "curve_treasury_par_canonical.parquet"
Show code
# 3.2 Load raw Treasury par curve parquet
# Raw curve data can arrive with mixed date fields and timezones; normalize to a clean observation calendar.
raw_path = Path("data/single_assets/treasury_par_yield_curve.parquet")
if not raw_path.exists():
    raise FileNotFoundError(f"Missing raw treasury par curve parquet: {raw_path}")

df_raw = pd.read_parquet(raw_path)

if "date" in df_raw.columns:
    df_raw["date"] = pd.to_datetime(df_raw["date"], errors="coerce")
    df_raw = df_raw.set_index("date")

if not isinstance(df_raw.index, pd.DatetimeIndex):
    df_raw.index = pd.to_datetime(df_raw.index, errors="coerce")
df_raw.index = ensure_naive_dates(df_raw.index)

df_raw = df_raw.loc[~df_raw.index.isna()].sort_index()

duplicate_dates_count = int(df_raw.index.duplicated().sum())
# Duplicate curve dates would contaminate factor estimation and any backtest alignment.
assert df_raw.index.is_monotonic_increasing
assert df_raw.index.has_duplicates is False

overall_missing_percent = float(df_raw.isna().mean().mean() * 100.0)

summary = pd.DataFrame(
    [
        {
            "raw_path": str(raw_path),
            "rows": int(df_raw.shape[0]),
            "cols": int(df_raw.shape[1]),
            "start_date": pd.Timestamp(df_raw.index.min()).to_pydatetime(warn=False),
            "end_date": pd.Timestamp(df_raw.index.max()).to_pydatetime(warn=False),
            "duplicate_dates_count": duplicate_dates_count,
            "overall_missing_percent": overall_missing_percent,
        }
    ]
)
show_table(summary)
+-----------------------------------------------------+--------+--------+---------------------+---------------------+-------------------------+---------------------------+
| raw_path                                            |   rows |   cols | start_date          | end_date            |   duplicate_dates_count |   overall_missing_percent |
|-----------------------------------------------------+--------+--------+---------------------+---------------------+-------------------------+---------------------------|
| data/single_assets/treasury_par_yield_curve.parquet |   9017 |     14 | 1990-01-02 00:00:00 | 2026-01-16 00:00:00 |                       0 |                   23.0089 |
+-----------------------------------------------------+--------+--------+---------------------+---------------------+-------------------------+---------------------------+
Show code
# 3.3 Standardize column names and select curve tenors
# Canonical tenor names ensure stable ordering and consistent PCA inputs across data vendors.

def canonical_tenor_name(name: str) -> str | None:
    s = str(name).strip().lower()
    for ch in ["-", "_"]:
        s = s.replace(ch, " ")
    s = " ".join(s.split())

    tokens = [t.strip().replace(".", "") for t in s.split() if t.strip()]

    num = None
    unit = None

    for t in tokens:
        if t.isdigit():
            num = int(t)
            continue

        digits = ""
        rest = ""
        for c in t:
            if c.isdigit() and rest == "":
                digits += c
            else:
                rest += c

        if digits and rest:
            if num is None:
                num = int(digits)
            t = rest

        if t in {"mo", "mon", "m"}:
            unit = "mo"
        if t in {"month", "months"}:
            unit = "mo"
        if t in {"yr", "yrs", "year", "years", "y"}:
            unit = "yr"

    if num is None or unit is None:
        return None
    return f"{num}_{unit}"


base_names = ["_".join(str(c).strip().lower().split()) for c in df_raw.columns]
df_raw = df_raw.copy()
df_raw.columns = base_names

rename_map = {}
for c in df_raw.columns:
    canon = canonical_tenor_name(c)
    if canon is not None:
        rename_map[c] = canon

df_raw = df_raw.rename(columns=rename_map)

dup_mask = pd.Index(df_raw.columns).duplicated(keep=False)
if dup_mask.any():
    dup_cols = sorted(set(pd.Index(df_raw.columns)[dup_mask]))
    raise ValueError(f"Duplicate tenor columns after standardization: {dup_cols}")

order = [
    "1_mo",
    "2_mo",
    "3_mo",
    "4_mo",
    "6_mo",
    "9_mo",
    "12_mo",
    "15_mo",
    "18_mo",
    "24_mo",
    "1_yr",
    "2_yr",
    "3_yr",
    "5_yr",
    "7_yr",
    "10_yr",
    "20_yr",
    "30_yr",
]
order_rank = {c: i for i, c in enumerate(order)}

# Only keep columns that can be mapped to a tenor; this trades coverage for consistency through time.
tenor_cols = [c for c in df_raw.columns if canonical_tenor_name(c) is not None]
if not tenor_cols:
    raise ValueError("No tenor columns detected in raw treasury par curve parquet")

tenor_cols = sorted(tenor_cols, key=lambda c: order_rank.get(c, 10_000))

df_tenors = df_raw[tenor_cols].copy()
for c in df_tenors.columns:
    df_tenors[c] = pd.to_numeric(df_tenors[c], errors="coerce").astype("float64")

audit_rows = []
for c in df_tenors.columns:
    s = df_tenors[c]
    audit_rows.append(
        {
            "column_name": c,
            "dtype": str(s.dtype),
            "start_date": s.first_valid_index(),
            "end_date": s.last_valid_index(),
            "missing_percent": float(s.isna().mean() * 100.0),
        }
    )

col_summary = pd.DataFrame(audit_rows).sort_values(
    by="column_name", key=lambda s: s.map(lambda x: order_rank.get(x, 10_000))
)
show_table(col_summary)
+---------------+---------+---------------------+---------------------+-------------------+
| column_name   | dtype   | start_date          | end_date            |   missing_percent |
|---------------+---------+---------------------+---------------------+-------------------|
| 1_mo          | float64 | 2001-07-31 00:00:00 | 2026-01-16 00:00:00 |        32.1615    |
| 2_mo          | float64 | 2018-10-16 00:00:00 | 2026-01-16 00:00:00 |        79.9046    |
| 3_mo          | float64 | 1990-01-02 00:00:00 | 2026-01-16 00:00:00 |         0.0443607 |
| 4_mo          | float64 | 2022-10-19 00:00:00 | 2026-01-16 00:00:00 |        91.017     |
| 6_mo          | float64 | 1990-01-02 00:00:00 | 2026-01-16 00:00:00 |         0.0110902 |
| 15_mo         | float64 | 2025-02-18 00:00:00 | 2026-01-16 00:00:00 |        97.4604    |
| 1_yr          | float64 | 1990-01-02 00:00:00 | 2026-01-16 00:00:00 |         0.0110902 |
| 2_yr          | float64 | 1990-01-02 00:00:00 | 2026-01-16 00:00:00 |         0.0110902 |
| 3_yr          | float64 | 1990-01-02 00:00:00 | 2026-01-16 00:00:00 |         0.0110902 |
| 5_yr          | float64 | 1990-01-02 00:00:00 | 2026-01-16 00:00:00 |         0.0110902 |
| 7_yr          | float64 | 1990-01-02 00:00:00 | 2026-01-16 00:00:00 |         0.0110902 |
| 10_yr         | float64 | 1990-01-02 00:00:00 | 2026-01-16 00:00:00 |         0.0110902 |
| 20_yr         | float64 | 1993-10-01 00:00:00 | 2026-01-16 00:00:00 |        10.4248    |
| 30_yr         | float64 | 1990-01-02 00:00:00 | 2026-01-16 00:00:00 |        11.0347    |
+---------------+---------+---------------------+---------------------+-------------------+
Show code
# 3.4 Create canonical observation index and reindex
# Canonical here means a shared trading calendar derived from observed curve dates (no fabricated gaps).
idx = df_tenors.index.sort_values().unique()

df_curve_canonical = df_tenors.reindex(idx)
df_curve_canonical.index.name = "date"

df_curve_canonical = df_curve_canonical[tenor_cols].copy()
for c in df_curve_canonical.columns:
    df_curve_canonical[c] = df_curve_canonical[c].astype("float64")

assert df_curve_canonical.index.is_monotonic_increasing
assert df_curve_canonical.index.has_duplicates is False

core_required = ["3_mo", "6_mo", "1_yr", "2_yr", "3_yr", "5_yr", "7_yr", "10_yr"]
missing_required = [c for c in core_required if c not in df_curve_canonical.columns]
if missing_required:
    raise AssertionError(f"Missing required core tenors: {missing_required}")
Show code
# 3.5 Save and reload verification
df_curve_canonical.to_parquet(output_path, engine="pyarrow")
df_reload = pd.read_parquet(output_path)

df_reload.index = ensure_naive_dates(df_reload.index)

df_reload = df_reload.sort_index()

checks = [
    {
        "check_name": "file_exists",
        "expected": True,
        "observed": output_path.exists(),
        "pass_bool": bool(output_path.exists()),
    },
    {
        "check_name": "shape",
        "expected": str(df_curve_canonical.shape),
        "observed": str(df_reload.shape),
        "pass_bool": bool(df_reload.shape == df_curve_canonical.shape),
    },
    {
        "check_name": "index_start",
        "expected": str(df_curve_canonical.index.min().date()),
        "observed": str(df_reload.index.min().date()),
        "pass_bool": bool(df_reload.index.min() == df_curve_canonical.index.min()),
    },
    {
        "check_name": "index_end",
        "expected": str(df_curve_canonical.index.max().date()),
        "observed": str(df_reload.index.max().date()),
        "pass_bool": bool(df_reload.index.max() == df_curve_canonical.index.max()),
    },
    {
        "check_name": "columns",
        "expected": ",".join(list(df_curve_canonical.columns)),
        "observed": ",".join(list(df_reload.columns)),
        "pass_bool": bool(list(df_reload.columns) == list(df_curve_canonical.columns)),
    },
    {
        "check_name": "float64_dtypes",
        "expected": True,
        "observed": bool((df_reload.dtypes == "float64").all()),
        "pass_bool": bool((df_reload.dtypes == "float64").all()),
    },
]

show_table(pd.DataFrame(checks))
+----------------+---------------------------------------------------------------------------+---------------------------------------------------------------------------+-------------+
| check_name     | expected                                                                  | observed                                                                  | pass_bool   |
|----------------+---------------------------------------------------------------------------+---------------------------------------------------------------------------+-------------|
| file_exists    | True                                                                      | True                                                                      | True        |
| shape          | (9017, 14)                                                                | (9017, 14)                                                                | True        |
| index_start    | 1990-01-02                                                                | 1990-01-02                                                                | True        |
| index_end      | 2026-01-16                                                                | 2026-01-16                                                                | True        |
| columns        | 1_mo,2_mo,3_mo,4_mo,6_mo,15_mo,1_yr,2_yr,3_yr,5_yr,7_yr,10_yr,20_yr,30_yr | 1_mo,2_mo,3_mo,4_mo,6_mo,15_mo,1_yr,2_yr,3_yr,5_yr,7_yr,10_yr,20_yr,30_yr | True        |
| float64_dtypes | True                                                                      | True                                                                      | True        |
+----------------+---------------------------------------------------------------------------+---------------------------------------------------------------------------+-------------+
Show code
# 3.6 Validation report
df_curve = df_curve_canonical

core_tenors = ["3_mo", "6_mo", "1_yr", "2_yr", "3_yr", "5_yr", "7_yr", "10_yr", "30_yr"]
core_tenors_present = [c for c in core_tenors if c in df_curve.columns]
core_tenors_missing = [c for c in core_tenors if c not in df_curve.columns]

core_def = pd.DataFrame(
    [
        {
            "core_tenors_used": ",".join(core_tenors_present),
            "core_tenors_missing": ",".join(core_tenors_missing),
            "n_core_tenors_used": int(len(core_tenors_present)),
        }
    ]
)
show_table(core_def)

if not core_tenors_present:
    raise ValueError("No core tenors present in df_curve")

percent_missing_overall = float(df_curve.isna().mean().mean() * 100.0)

count_days_with_any_data = int(df_curve.notna().any(axis=1).sum())
count_days_with_all_core_tenors = int(df_curve[core_tenors_present].notna().all(axis=1).sum())

panel_summary = pd.DataFrame(
    [
        {
            "rows": int(df_curve.shape[0]),
            "columns": int(df_curve.shape[1]),
            "start_date": pd.Timestamp(df_curve.index.min()).to_pydatetime(warn=False),
            "end_date": pd.Timestamp(df_curve.index.max()).to_pydatetime(warn=False),
            "percent_missing_overall": percent_missing_overall,
            "n_core_tenors_used": int(len(core_tenors_present)),
            "count_days_with_any_data": count_days_with_any_data,
            "count_days_with_all_core_tenors": count_days_with_all_core_tenors,
        }
    ]
)

show_table(panel_summary)

rows = []
for t in df_curve.columns:
    s = df_curve[t]
    rows.append(
        {
            "tenor": t,
            "start_date": s.first_valid_index(),
            "end_date": s.last_valid_index(),
            "obs_count": int(s.notna().sum()),
            "missing_percent": float(s.isna().mean() * 100.0),
        }
    )

tenor_summary = pd.DataFrame(rows)
tenor_summary = tenor_summary.sort_values(
    by="tenor", key=lambda s: s.map(lambda x: order_rank.get(x, 10_000))
).reset_index(drop=True)

show_table(tenor_summary)

tenor_summary.to_parquet(
    derived_dir / "curve_treasury_par_canonical_manifest.parquet", engine="pyarrow", index=False
)
+------------------------------------------------+-----------------------+----------------------+
| core_tenors_used                               | core_tenors_missing   |   n_core_tenors_used |
|------------------------------------------------+-----------------------+----------------------|
| 3_mo,6_mo,1_yr,2_yr,3_yr,5_yr,7_yr,10_yr,30_yr |                       |                    9 |
+------------------------------------------------+-----------------------+----------------------+
+--------+-----------+---------------------+---------------------+---------------------------+----------------------+----------------------------+-----------------------------------+
|   rows |   columns | start_date          | end_date            |   percent_missing_overall |   n_core_tenors_used |   count_days_with_any_data |   count_days_with_all_core_tenors |
|--------+-----------+---------------------+---------------------+---------------------------+----------------------+----------------------------+-----------------------------------|
|   9017 |        14 | 1990-01-02 00:00:00 | 2026-01-16 00:00:00 |                   23.0089 |                    9 |                       9016 |                              8019 |
+--------+-----------+---------------------+---------------------+---------------------------+----------------------+----------------------------+-----------------------------------+
+---------+---------------------+---------------------+-------------+-------------------+
| tenor   | start_date          | end_date            |   obs_count |   missing_percent |
|---------+---------------------+---------------------+-------------+-------------------|
| 1_mo    | 2001-07-31 00:00:00 | 2026-01-16 00:00:00 |        6117 |        32.1615    |
| 2_mo    | 2018-10-16 00:00:00 | 2026-01-16 00:00:00 |        1812 |        79.9046    |
| 3_mo    | 1990-01-02 00:00:00 | 2026-01-16 00:00:00 |        9013 |         0.0443607 |
| 4_mo    | 2022-10-19 00:00:00 | 2026-01-16 00:00:00 |         810 |        91.017     |
| 6_mo    | 1990-01-02 00:00:00 | 2026-01-16 00:00:00 |        9016 |         0.0110902 |
| 15_mo   | 2025-02-18 00:00:00 | 2026-01-16 00:00:00 |         229 |        97.4604    |
| 1_yr    | 1990-01-02 00:00:00 | 2026-01-16 00:00:00 |        9016 |         0.0110902 |
| 2_yr    | 1990-01-02 00:00:00 | 2026-01-16 00:00:00 |        9016 |         0.0110902 |
| 3_yr    | 1990-01-02 00:00:00 | 2026-01-16 00:00:00 |        9016 |         0.0110902 |
| 5_yr    | 1990-01-02 00:00:00 | 2026-01-16 00:00:00 |        9016 |         0.0110902 |
| 7_yr    | 1990-01-02 00:00:00 | 2026-01-16 00:00:00 |        9016 |         0.0110902 |
| 10_yr   | 1990-01-02 00:00:00 | 2026-01-16 00:00:00 |        9016 |         0.0110902 |
| 20_yr   | 1993-10-01 00:00:00 | 2026-01-16 00:00:00 |        8077 |        10.4248    |
| 30_yr   | 1990-01-02 00:00:00 | 2026-01-16 00:00:00 |        8022 |        11.0347    |
+---------+---------------------+---------------------+-------------+-------------------+
Show code
# 3.7 Canonical curve era coverage (core tenors)
core_tenors_era = ["3_mo", "6_mo", "1_yr", "2_yr", "3_yr", "5_yr", "7_yr", "10_yr"]
core_tenors_present = [c for c in core_tenors_era if c in df_curve_canonical.columns]
if not core_tenors_present:
    raise ValueError("No core tenors present in canonical curve panel")

era_buckets = {
    "pre_2008": (pd.Timestamp("1990-01-02"), pd.Timestamp("2007-12-31")),
    "post_2008": (pd.Timestamp("2008-01-01"), pd.Timestamp("2019-12-31")),
    "post_2020": (pd.Timestamp("2020-01-01"), None),
}

rows = []
for era, (start, end) in era_buckets.items():
    if end is None:
        mask = df_curve_canonical.index >= start
    else:
        mask = (df_curve_canonical.index >= start) & (df_curve_canonical.index <= end)

    n_days_in_bucket = int(mask.sum())
    if n_days_in_bucket == 0:
        rows.append(
            {
                "era": era,
                "n_days_in_bucket": 0,
                "all_core_days": 0,
                "coverage_all_core_pct": np.nan,
            }
)
        continue

    all_core_days = int(df_curve_canonical.loc[mask, core_tenors_present].notna().all(axis=1).sum())
    rows.append(
        {
            "era": era,
            "n_days_in_bucket": n_days_in_bucket,
            "all_core_days": all_core_days,
            "coverage_all_core_pct": float(all_core_days / n_days_in_bucket),
        }
)

era_core_coverage = pd.DataFrame(rows)
show_table(era_core_coverage)
+-----------+--------------------+-----------------+-------------------------+
| era       |   n_days_in_bucket |   all_core_days |   coverage_all_core_pct |
|-----------+--------------------+-----------------+-------------------------|
| pre_2008  |               4503 |            4503 |                1        |
| post_2008 |               3003 |            2999 |                0.998668 |
| post_2020 |               1511 |            1511 |                1        |
+-----------+--------------------+-----------------+-------------------------+

3.8 PCA structure diagnostics on yield changes

This section documents how the PCA structure evolves through time on the canonical curve panel. It reports explained variance ratios and loading shapes across tenors using yield changes in basis points.

Show code
section3_dir = Path("outputs/section_03")
section3_dir.mkdir(parents=True, exist_ok=True)

pca_tenors = ["3_mo", "6_mo", "1_yr", "2_yr", "3_yr", "5_yr", "7_yr", "10_yr"]
missing_pca_tenors = [t for t in pca_tenors if t not in df_curve_canonical.columns]
if missing_pca_tenors:
    raise ValueError(f"Missing tenors for PCA structure diagnostics: {missing_pca_tenors}")

df_y = df_curve_canonical[pca_tenors].copy()

dy_bp = df_y.diff() * 100.0
dy_bp = dy_bp.dropna(axis=0, how="any")

window_years = 5
window_days = 252 * window_years
step_days = 21

if dy_bp.shape[0] < window_days + 10:
    raise ValueError("Not enough history to compute rolling PCA structure diagnostics.")

sample_dates = dy_bp.index[window_days::step_days]

rows = []
for end_date in sample_dates:
    w = dy_bp.loc[:end_date].tail(window_days)
    if w.shape[0] < window_days:
        continue
    X = w.values
    Xc = X - X.mean(axis=0, keepdims=True)
    loadings, scores, evr = pca_svd(Xc, k=8)
    row = {"date": end_date}
    for k in range(1, 9):
        row[f"evr_pc{k}"] = float(evr[k - 1])
    rows.append(row)

df_evr_roll = pd.DataFrame(rows).set_index("date").sort_index()
df_evr_roll.to_csv(section3_dir / "pca_evr_rolling_5y.csv")

fig, ax = plt.subplots()
for k in range(1, 6):
    ax.plot(df_evr_roll.index, df_evr_roll[f"evr_pc{k}"], label=f"PC{k}")
ax.set_title("Rolling 5 year explained variance ratio on yield changes")
ax.set_xlabel("date")
ax.set_ylabel("explained variance ratio")
ax.legend()
fig.savefig(section3_dir / "pca_evr_rolling_5y.png", dpi=150, bbox_inches="tight")
plt.show()

snapshot_targets = [
    ("1995", pd.Timestamp("1995-01-03")),
    ("2005", pd.Timestamp("2005-01-03")),
    ("2015", pd.Timestamp("2015-01-02")),
    ("2025", pd.Timestamp("2025-01-02")),
]

def nearest_index_date(idx: pd.DatetimeIndex, target: pd.Timestamp) -> pd.Timestamp:
    pos = int(idx.searchsorted(target))
    if pos <= 0:
        return pd.Timestamp(idx[0])
    if pos >= len(idx):
        return pd.Timestamp(idx[-1])
    left = pd.Timestamp(idx[pos - 1])
    right = pd.Timestamp(idx[pos])
    if abs((right - target).days) < abs((target - left).days):
        return right
    return left

snap_rows = []
snap_loadings_long = []

tenor_years = [tenor_to_years(t) for t in pca_tenors]

for label, target in snapshot_targets:
    snap_end = nearest_index_date(dy_bp.index, target)
    w = dy_bp.loc[:snap_end].tail(window_days)
    if w.shape[0] < window_days:
        continue
    X = w.values
    Xc = X - X.mean(axis=0, keepdims=True)
    loadings, scores, evr = pca_svd(Xc, k=5)
    loadings_3 = align_signs(loadings[:3, :], pca_tenors)

    snap_row = {"snapshot": label, "window_end": snap_end}
    for k in range(1, 6):
        snap_row[f"evr_pc{k}"] = float(evr[k - 1])
    snap_rows.append(snap_row)

    for pc_i in range(3):
        for j, tenor in enumerate(pca_tenors):
            snap_loadings_long.append(
                {
                    "snapshot": label,
                    "window_end": snap_end,
                    "pc": f"PC{pc_i + 1}",
                    "tenor": tenor,
                    "tenor_years": float(tenor_years[j]),
                    "loading": float(loadings_3[pc_i, j]),
                }
            )

df_snap_evr = pd.DataFrame(snap_rows)
df_snap_evr.to_csv(section3_dir / "pca_snapshots_explained_variance.csv", index=False)

df_snap_loadings = pd.DataFrame(snap_loadings_long)
df_snap_loadings.to_csv(section3_dir / "pca_snapshots_loadings.csv", index=False)

show_table(df_snap_evr)

fig, axes = plt.subplots(nrows=3, ncols=1, figsize=(10, 10), sharex=True)

snapshots = sorted(df_snap_loadings["snapshot"].unique())[-3:]
tenor_labels = pca_tenors
x = np.arange(len(tenor_labels), dtype=float)

pc_labels = ["PC1", "PC2", "PC3"]
bar_width = 0.25

for ax, snap in zip(axes, snapshots):
    df_snap = df_snap_loadings[df_snap_loadings["snapshot"] == snap].copy()
    df_snap["tenor"] = pd.Categorical(df_snap["tenor"], categories=tenor_labels, ordered=True)
    df_snap["pc"] = pd.Categorical(df_snap["pc"], categories=pc_labels, ordered=True)
    df_snap = df_snap.sort_values(["tenor", "pc"])

    for j, pc in enumerate(pc_labels):
        d = df_snap[df_snap["pc"] == pc].set_index("tenor").reindex(tenor_labels)
        y = d["loading"].astype("float64").values
        ax.bar(x + (j - 1) * bar_width, y, width=bar_width, label=pc, alpha=0.9)

    ax.set_title(f"{snap} snapshot: PC loadings across tenors")
    ax.set_ylabel("loading")
    ax.axhline(0.0, linewidth=1, alpha=0.4)
    ax.legend(title="pc", ncol=3)

axes[-1].set_xlabel("tenor")
axes[-1].set_xticks(x)
axes[-1].set_xticklabels(tenor_labels, rotation=0)

fig.tight_layout()
fig.savefig(section3_dir / "pca_loadings_snapshots.png", dpi=150, bbox_inches="tight")
plt.show()

+------------+---------------------+-----------+-----------+-----------+-----------+------------+
|   snapshot | window_end          |   evr_pc1 |   evr_pc2 |   evr_pc3 |   evr_pc4 |    evr_pc5 |
|------------+---------------------+-----------+-----------+-----------+-----------+------------|
|       2005 | 2005-01-03 00:00:00 |  0.829606 | 0.105123  | 0.0337184 | 0.0142813 | 0.00664225 |
|       2015 | 2015-01-02 00:00:00 |  0.894341 | 0.0564883 | 0.0162634 | 0.0114539 | 0.00690477 |
|       2025 | 2025-01-02 00:00:00 |  0.825211 | 0.0964274 | 0.0439861 | 0.0159597 | 0.0086112  |
+------------+---------------------+-----------+-----------+-----------+-----------+------------+

2.X Availability notes for downstream backtest design

Show code
# 2.X.1 Define era buckets and core candidate universes
eras = pd.DataFrame([
    {"name": "pre_2008", "start_date": pd.Timestamp("1990-01-02"), "end_date": pd.Timestamp("2007-12-31")},
    {"name": "post_2008", "start_date": pd.Timestamp("2008-01-01"), "end_date": pd.Timestamp("2019-12-31")},
    {"name": "post_2020", "start_date": pd.Timestamp("2020-01-01"), "end_date": df.index.max()},
])

candidate_universes = {
    'U_core_8': ["3_mo","6_mo","1_yr","2_yr","3_yr","5_yr","7_yr","10_yr"],
    'U_core_9': ["3_mo","6_mo","1_yr","2_yr","3_yr","5_yr","7_yr","10_yr","30_yr"],
    'U_core_10': ["3_mo","6_mo","1_yr","2_yr","3_yr","5_yr","7_yr","10_yr","20_yr","30_yr"],
    'U_short_end': ["1_mo","3_mo","6_mo","1_yr","2_yr"]
}

# only keep tenors that actually exist in the wide dataset later when applied
un_rows = []
for name, members in candidate_universes.items():
    available = [m for m in members if m in df.columns]
    un_rows.append({'universe': name, 'members': ','.join(available)})
un_table = pd.DataFrame(un_rows)
show_table(un_table)
+-------------+------------------------------------------------------+
| universe    | members                                              |
|-------------+------------------------------------------------------|
| U_core_8    | 3_mo,6_mo,1_yr,2_yr,3_yr,5_yr,7_yr,10_yr             |
| U_core_9    | 3_mo,6_mo,1_yr,2_yr,3_yr,5_yr,7_yr,10_yr,30_yr       |
| U_core_10   | 3_mo,6_mo,1_yr,2_yr,3_yr,5_yr,7_yr,10_yr,20_yr,30_yr |
| U_short_end | 1_mo,3_mo,6_mo,1_yr,2_yr                             |
+-------------+------------------------------------------------------+
Show code
# 2.X.2 Era coverage for wide dataset groups
rows = []
groups = ['treasury_par_curve','fred_dgs','macro']
for group in groups:
    group_cols = col_audit_sorted.loc[col_audit_sorted['group'] == group, 'column_name'].tolist()
    for _, r in eras.iterrows():
        start = r['start_date']
        end = r['end_date']
        if pd.isna(end):
            mask = df.index >= start
        else:
            mask = (df.index >= start) & (df.index <= end)
        n_days = int(mask.sum())
        if n_days == 0:
            rows.append({'era': r['name'], 'group': group, 'days_in_index':0, 'any_non_null_days':0, 'any_non_null_pct': np.nan})
            continue
        if group_cols:
            any_non_null_days = int(df.loc[mask, group_cols].notna().any(axis=1).sum())
            any_non_null_pct = float(any_non_null_days / n_days)
        else:
            any_non_null_days = 0
            any_non_null_pct = np.nan
        rows.append({'era': r['name'], 'group': group, 'days_in_index': n_days, 'any_non_null_days': any_non_null_days, 'any_non_null_pct': any_non_null_pct})
era_group_summary = pd.DataFrame(rows)
show_table(era_group_summary)
era_group_summary.to_parquet(out_dir / 'availability_era_group_summary.parquet', engine='pyarrow', index=False)
+-----------+--------------------+-----------------+---------------------+--------------------+
| era       | group              |   days_in_index |   any_non_null_days |   any_non_null_pct |
|-----------+--------------------+-----------------+---------------------+--------------------|
| pre_2008  | treasury_par_curve |            4695 |                4503 |           0.959105 |
| post_2008 | treasury_par_curve |            3131 |                3002 |           0.958799 |
| post_2020 | treasury_par_curve |            1578 |                1511 |           0.957541 |
| pre_2008  | fred_dgs           |            4695 |                4503 |           0.959105 |
| post_2008 | fred_dgs           |            3131 |                3002 |           0.958799 |
| post_2020 | fred_dgs           |            1578 |                1510 |           0.956907 |
| pre_2008  | macro              |            4695 |                2268 |           0.483067 |
| post_2008 | macro              |            3131 |                3020 |           0.964548 |
| post_2020 | macro              |            1578 |                1520 |           0.963245 |
+-----------+--------------------+-----------------+---------------------+--------------------+

3.X Canonical curve availability diagnostics

Show code
# 3.X.1 Load canonical curve panel and locate the raw curve dates used to build it
curve_path = derived_dir / 'curve_treasury_par_canonical.parquet'
if not curve_path.exists():
    raise FileNotFoundError(curve_path)
df_curve = pd.read_parquet(curve_path)
if 'date' in df_curve.columns:
    df_curve['date'] = pd.to_datetime(df_curve['date'], errors='coerce')
    df_curve = df_curve.set_index('date')
df_curve.index = ensure_naive_dates(df_curve.index)
df_curve = df_curve.sort_index()
# use df_raw if present, else attempt to locate raw parquet under data
try:
    df_raw
    _ = df_raw
except NameError:
    import glob
    candidates = list(Path('data').rglob('*treasury*par*.parquet'))
    if not candidates:
        raise FileNotFoundError('raw treasury par curve parquet not found')
    df_raw = pd.read_parquet(candidates[0])
    if 'date' in df_raw.columns:
        df_raw['date'] = pd.to_datetime(df_raw['date'], errors='coerce')
        df_raw = df_raw.set_index('date')
    df_raw.index = ensure_naive_dates(df_raw.index)
    df_raw = df_raw.sort_index()
one_row = pd.DataFrame([{'raw_start': df_raw.index.min(), 'raw_end': df_raw.index.max(), 'raw_rows': len(df_raw), 'curve_start': df_curve.index.min(), 'curve_end': df_curve.index.max(), 'curve_rows': len(df_curve)}])
show_table(one_row)
+---------------------+---------------------+------------+---------------------+---------------------+--------------+
| raw_start           | raw_end             |   raw_rows | curve_start         | curve_end           |   curve_rows |
|---------------------+---------------------+------------+---------------------+---------------------+--------------|
| 1990-01-02 00:00:00 | 2026-01-16 00:00:00 |       9017 | 1990-01-02 00:00:00 | 2026-01-16 00:00:00 |         9017 |
+---------------------+---------------------+------------+---------------------+---------------------+--------------+
Show code
# 3.X.2 Observation-calendar missingness summary
rows_all = []
stable_tenors = [t for t in ['3_mo','6_mo','1_yr','2_yr','3_yr','5_yr','7_yr','10_yr'] if t in df_curve.columns]
for label, cols in [('all', df_curve.columns.tolist()), ('stable', stable_tenors)]:
    if not cols:
        continue
    n_dates_curve_obs = len(df_curve.index)
    n_dates_raw_obs = len(df_raw.index)
    overlap = df_curve.index.intersection(df_raw.index)
    n_overlap_dates = len(overlap)
    n_curve_only = len(df_curve.index.difference(df_raw.index))
    n_raw_only = len(df_raw.index.difference(df_curve.index))
    missing_pct_on_curve_index = float(df_curve[cols].isna().mean().mean() * 100.0)
    missing_pct_on_overlap_only = float(df_curve.loc[overlap, cols].isna().mean().mean() * 100.0) if len(overlap) else np.nan
    rows_all.append({'scope': label, 'n_dates_curve_obs': n_dates_curve_obs, 'n_dates_raw_obs': n_dates_raw_obs, 'n_overlap_dates': n_overlap_dates, 'n_curve_only_dates': n_curve_only, 'n_raw_only_dates': n_raw_only, 'missing_pct_on_curve_index': missing_pct_on_curve_index, 'missing_pct_on_overlap_only': missing_pct_on_overlap_only})
table_decomp = pd.DataFrame(rows_all)
show_table(table_decomp)
table_decomp.to_parquet(derived_dir / 'curve_missingness_summary.parquet', engine='pyarrow', index=False)
+---------+---------------------+-------------------+-------------------+----------------------+--------------------+------------------------------+-------------------------------+
| scope   |   n_dates_curve_obs |   n_dates_raw_obs |   n_overlap_dates |   n_curve_only_dates |   n_raw_only_dates |   missing_pct_on_curve_index |   missing_pct_on_overlap_only |
|---------+---------------------+-------------------+-------------------+----------------------+--------------------+------------------------------+-------------------------------|
| all     |                9017 |              9017 |              9017 |                    0 |                  0 |                    23.0089   |                     23.0089   |
| stable  |                9017 |              9017 |              9017 |                    0 |                  0 |                     0.015249 |                      0.015249 |
+---------+---------------------+-------------------+-------------------+----------------------+--------------------+------------------------------+-------------------------------+
Show code
# 3.X.3 Missing streaks for 20_yr and 30_yr
rows_summary = []
st_tenors = [t for t in ['20_yr','30_yr'] if t in df_curve.columns]
for t in st_tenors:
    seg = compute_missing_streaks(df_curve[t], df_curve.index)
    if seg.empty:
        continue
    seg_sorted = seg.sort_values('length_days', ascending=False).reset_index(drop=True)
    show_table(seg_sorted.head(10))
    rows_summary.append({'tenor': t, 'total_missing_days': int(df_curve[t].isna().sum()), 'longest_missing_streak_days': int(seg_sorted['length_days'].max()), 'number_of_missing_streaks': int(len(seg_sorted))})
summary_df = pd.DataFrame(rows_summary)
show_table(summary_df)
out_segments = {t: (compute_missing_streaks(df_curve[t], df_curve.index).assign(tenor=t)) for t in st_tenors}
out_list = pd.concat(out_segments.values(), ignore_index=True) if out_segments else pd.DataFrame([])
out_list.to_parquet(derived_dir / 'curve_missing_streaks_long_end.parquet', engine='pyarrow', index=False)
summary_df.to_parquet(derived_dir / 'curve_missing_streaks_long_end_summary.parquet', engine='pyarrow', index=False)
+---------------------+---------------------+---------------+------------------------+
| segment_start       | segment_end         |   length_days |   length_business_days |
|---------------------+---------------------+---------------+------------------------|
| 1990-01-02 00:00:00 | 1993-09-30 00:00:00 |          1368 |                    939 |
| 2010-10-11 00:00:00 | 2010-10-11 00:00:00 |             1 |                      1 |
+---------------------+---------------------+---------------+------------------------+
+---------------------+---------------------+---------------+------------------------+
| segment_start       | segment_end         |   length_days |   length_business_days |
|---------------------+---------------------+---------------+------------------------|
| 2002-02-19 00:00:00 | 2006-02-08 00:00:00 |          1451 |                    994 |
| 2010-10-11 00:00:00 | 2010-10-11 00:00:00 |             1 |                      1 |
+---------------------+---------------------+---------------+------------------------+
+---------+----------------------+-------------------------------+-----------------------------+
| tenor   |   total_missing_days |   longest_missing_streak_days |   number_of_missing_streaks |
|---------+----------------------+-------------------------------+-----------------------------|
| 20_yr   |                  940 |                          1368 |                           2 |
| 30_yr   |                  995 |                          1451 |                           2 |
+---------+----------------------+-------------------------------+-----------------------------+
Show code
# 3.X.4 Universe feasibility table using overlap dates
idx_overlap = df_curve.index.intersection(df_raw.index)
rows = []
for name, members in candidate_universes.items():
    cols = [c for c in members if c in df_curve.columns]
    n_cols = len(cols)
    if n_cols == 0:
        rows.append({'universe': name, 'n_cols': 0, 'first_date_all_non_null': None, 'last_date_all_non_null': None, 'n_days_all_non_null': 0, 'share_of_overlap_days': 0.0, 'missing_pct_on_overlap': np.nan})
        continue
    sub = df_curve.loc[idx_overlap, cols]
    mask_all = sub.notna().all(axis=1)
    if mask_all.any():
        first_all = mask_all[mask_all].index[0]
        last_all = mask_all[mask_all].index[-1]
        n_all = int(mask_all.sum())
    else:
        first_all = None
        last_all = None
        n_all = 0
    share = float(n_all / len(idx_overlap)) if len(idx_overlap) else np.nan
    missing_pct = float(sub.isna().mean().mean() * 100.0)
    rows.append({'universe': name, 'n_cols': n_cols, 'first_date_all_non_null': first_all, 'last_date_all_non_null': last_all, 'n_days_all_non_null': n_all, 'share_of_overlap_days': share, 'missing_pct_on_overlap': missing_pct})
feasibility = pd.DataFrame(rows)
show_table(feasibility)
feasibility.to_parquet(derived_dir / 'curve_universe_feasibility.parquet', engine='pyarrow', index=False)
+-------------+----------+---------------------------+--------------------------+-----------------------+-------------------------+--------------------------+
| universe    |   n_cols | first_date_all_non_null   | last_date_all_non_null   |   n_days_all_non_null |   share_of_overlap_days |   missing_pct_on_overlap |
|-------------+----------+---------------------------+--------------------------+-----------------------+-------------------------+--------------------------|
| U_core_8    |        8 | 1990-01-02 00:00:00       | 2026-01-16 00:00:00      |                  9013 |                0.999556 |                 0.015249 |
| U_core_9    |        9 | 1990-01-02 00:00:00       | 2026-01-16 00:00:00      |                  8019 |                0.88932  |                 1.23963  |
| U_core_10   |       10 | 1993-10-01 00:00:00       | 2026-01-16 00:00:00      |                  7080 |                0.785184 |                 2.15815  |
| U_short_end |        5 | 2001-07-31 00:00:00       | 2026-01-16 00:00:00      |                  6114 |                0.678053 |                 6.44782  |
+-------------+----------+---------------------------+--------------------------+-----------------------+-------------------------+--------------------------+
Show code
# 3.X.5 Decision note table for next step
rows = []
for _, r in feasibility.iterrows():
    un = r['universe']
    share = float(r['share_of_overlap_days']) if pd.notna(r['share_of_overlap_days']) else 0.0
    n_cols = int(r['n_cols'])
    if n_cols >= 8 and share >= 0.9:
        rec = 'yes'
        reason = 'stable history and minimal true gaps'
    elif share >= 0.7 and n_cols >= 5:
        rec = 'yes'
        reason = 'usable with caveats'
    else:
        rec = 'no'
        reason = 'insufficient overlap or missingness'
    rows.append({'universe': un, 'recommended_for_main_backtest': rec, 'reason': reason})
curve_universe_recommendation = pd.DataFrame(rows)
show_table(curve_universe_recommendation)
curve_universe_recommendation.to_parquet(derived_dir / 'curve_universe_recommendation.parquet', engine='pyarrow', index=False)
# write small summary json for README generation
summary = {'canonical_path': str(curve_path), 'stable_universes': curve_universe_recommendation.loc[curve_universe_recommendation['recommended_for_main_backtest']=='yes','universe'].tolist(), 'artifacts': ['availability_era_group_summary.parquet','curve_missingness_summary.parquet','curve_missing_streaks_long_end.parquet','curve_missing_streaks_long_end_summary.parquet','curve_universe_feasibility.parquet','curve_universe_recommendation.parquet']}
with open(derived_dir / 'dataset_status_summary.json','w') as f:
    json.dump(summary, f, default=str)
+-------------+---------------------------------+--------------------------------------+
| universe    | recommended_for_main_backtest   | reason                               |
|-------------+---------------------------------+--------------------------------------|
| U_core_8    | yes                             | stable history and minimal true gaps |
| U_core_9    | yes                             | usable with caveats                  |
| U_core_10   | yes                             | usable with caveats                  |
| U_short_end | no                              | insufficient overlap or missingness  |
+-------------+---------------------------------+--------------------------------------+

4. Backtest specification and time axis

Show code
# 4.1 Imports and paths
# Section 4 defines the backtest time axis and conventions; no trading assumptions yet.

derived_dir = Path("data/derived")
derived_dir.mkdir(parents=True, exist_ok=True)
Show code
# 4.2 Load canonical curve and define main universe
# Universe is a set of curve tenors (curve points), not a tradable instrument universe.
curve_path = derived_dir / "curve_treasury_par_canonical.parquet"
if not curve_path.exists():
    raise FileNotFoundError(curve_path)

df_curve = pd.read_parquet(curve_path)
if "date" in df_curve.columns:
    df_curve["date"] = pd.to_datetime(df_curve["date"], errors="coerce")
    df_curve = df_curve.set_index("date")
df_curve.index = ensure_naive_dates(df_curve.index)
df_curve = df_curve.sort_index()

universe_name = "U_core_8"
tenors = ["3_mo", "6_mo", "1_yr", "2_yr", "3_yr", "5_yr", "7_yr", "10_yr"]

missing_tenors = [t for t in tenors if t not in df_curve.columns]
if missing_tenors:
    raise AssertionError(f"Missing tenors in canonical curve: {missing_tenors}")

universe_summary = pd.DataFrame(
    [
        {
            "universe_name": universe_name,
            "tenors_used": ",".join(tenors),
            "n_tenors": int(len(tenors)),
        }
    ]
)
show_table(universe_summary)
+-----------------+------------------------------------------+------------+
| universe_name   | tenors_used                              |   n_tenors |
|-----------------+------------------------------------------+------------|
| U_core_8        | 3_mo,6_mo,1_yr,2_yr,3_yr,5_yr,7_yr,10_yr |          8 |
+-----------------+------------------------------------------+------------+
Show code
# 4.3 Define the trading calendar using overlap publish dates
# Use overlap between raw and canonical curves to avoid calendar mismatches and hidden data gaps.
try:
    df_raw
    _ = df_raw
except NameError:
    candidates = list(Path("data").rglob("*treasury*par*.parquet"))
    if not candidates:
        raise FileNotFoundError("raw treasury par curve parquet not found")
    df_raw = pd.read_parquet(candidates[0])
    if "date" in df_raw.columns:
        df_raw["date"] = pd.to_datetime(df_raw["date"], errors="coerce")
        df_raw = df_raw.set_index("date")
    df_raw.index = ensure_naive_dates(df_raw.index)
    df_raw = df_raw.sort_index()

idx_curve = df_curve.index
idx_raw = df_raw.index
idx_overlap = idx_curve.intersection(idx_raw)

calendar_summary = pd.DataFrame(
    [
        {
            "n_curve_dates": int(len(idx_curve)),
            "n_raw_dates": int(len(idx_raw)),
            "n_overlap_dates": int(len(idx_overlap)),
            "n_curve_only_dates": int(len(idx_curve.difference(idx_raw))),
            "n_raw_only_dates": int(len(idx_raw.difference(idx_curve))),
        }
    ]
)
show_table(calendar_summary)
+-----------------+---------------+-------------------+----------------------+--------------------+
|   n_curve_dates |   n_raw_dates |   n_overlap_dates |   n_curve_only_dates |   n_raw_only_dates |
|-----------------+---------------+-------------------+----------------------+--------------------|
|            9017 |          9017 |              9017 |                    0 |                  0 |
+-----------------+---------------+-------------------+----------------------+--------------------+
Show code
# 4.4 Define the effective backtest sample window for the chosen universe
# Require all tenors present to avoid implicit interpolation; this is a data-viability screen.
df_overlap = df_curve.loc[idx_overlap, tenors].copy()
mask_all = df_overlap.notna().all(axis=1)
if mask_all.any():
    first_date_all_non_null = mask_all[mask_all].index[0]
    last_date_all_non_null = mask_all[mask_all].index[-1]
else:
    first_date_all_non_null = None
    last_date_all_non_null = None

n_days_all_non_null = int(mask_all.sum())
share_of_overlap_days = float(n_days_all_non_null / len(idx_overlap)) if len(idx_overlap) else np.nan

window_summary = pd.DataFrame(
    [
        {
            "first_date_all_non_null": first_date_all_non_null,
            "last_date_all_non_null": last_date_all_non_null,
            "n_days_all_non_null": n_days_all_non_null,
            "share_of_overlap_days": share_of_overlap_days,
        }
    ]
)
show_table(window_summary)
+---------------------------+--------------------------+-----------------------+-------------------------+
| first_date_all_non_null   | last_date_all_non_null   |   n_days_all_non_null |   share_of_overlap_days |
|---------------------------+--------------------------+-----------------------+-------------------------|
| 1990-01-02 00:00:00       | 2026-01-16 00:00:00      |                  9013 |                0.999556 |
+---------------------------+--------------------------+-----------------------+-------------------------+
Show code
# 4.5 Define duration mapping and PnL proxy conventions
# Duration proxy uses modified duration derived from each day's yields with par-bond assumptions.
duration_panel = build_duration_panel(df_overlap, tenors)
duration_panel_path = derived_dir / "duration_panel.parquet"
duration_panel.to_parquet(duration_panel_path, engine="pyarrow", index=True)

duration_rows = []
for tenor in tenors:
    maturity_years = tenor_to_years(tenor)
    s = duration_panel[tenor].dropna()
    duration_rows.append(
        {
            "tenor": tenor,
            "maturity_years": maturity_years,
            "duration_years": float(s.median()) if len(s) else np.nan,
            "duration_mean": float(s.mean()) if len(s) else np.nan,
            "duration_p05": float(s.quantile(0.05)) if len(s) else np.nan,
            "duration_p95": float(s.quantile(0.95)) if len(s) else np.nan,
            "method": "par bond modified duration from yields",
            "rationale": "Approximate modified duration from same-day yields",
        }
    )

durations = pd.DataFrame(duration_rows)
durations_path = derived_dir / "duration_assumptions.parquet"
durations.to_parquet(durations_path, engine="pyarrow", index=False)
show_table(durations)

pnl_convention = pd.DataFrame(
    [
        {
            "yields_units": "percent",
            "dy_units": "percent points",
            "dy_decimal": "dy / 100",
            "pnl_proxy": "sum(position_t * -duration * dy_decimal_t, where dy_decimal_t = (y_t - y_{t-1})/100)",
            "interpretation": "approximate price return per unit notional",
        }
    ]
)
show_table(pnl_convention)
+---------+------------------+------------------+-----------------+----------------+----------------+----------------------------------------+----------------------------------------------------+
| tenor   |   maturity_years |   duration_years |   duration_mean |   duration_p05 |   duration_p95 | method                                 | rationale                                          |
|---------+------------------+------------------+-----------------+----------------+----------------+----------------------------------------+----------------------------------------------------|
| 3_mo    |             0.25 |         0.243404 |        0.243326 |       0.235938 |       0.249925 | par bond modified duration from yields | Approximate modified duration from same-day yields |
| 6_mo    |             0.5  |         0.485437 |        0.486113 |       0.470633 |       0.4997   | par bond modified duration from yields | Approximate modified duration from same-day yields |
| 1_yr    |             1    |         0.969838 |        0.971307 |       0.940734 |       0.998901 | par bond modified duration from yields | Approximate modified duration from same-day yields |
| 2_yr    |             2    |         1.91933  |        1.92266  |       1.8396   |       1.99401  | par bond modified duration from yields | Approximate modified duration from same-day yields |
| 3_yr    |             3    |         2.82211  |        2.83128  |       2.65903  |       2.98119  | par bond modified duration from yields | Approximate modified duration from same-day yields |
| 5_yr    |             5    |         4.52437  |        4.52962  |       4.10228  |       4.89607  | par bond modified duration from yields | Approximate modified duration from same-day yields |
| 7_yr    |             7    |         6.0702   |        6.06733  |       5.33071  |       6.70222  | par bond modified duration from yields | Approximate modified duration from same-day yields |
| 10_yr   |            10    |         8.10905  |        8.12744  |       6.85269  |       9.22582  | par bond modified duration from yields | Approximate modified duration from same-day yields |
+---------+------------------+------------------+-----------------+----------------+----------------+----------------------------------------+----------------------------------------------------+
+----------------+----------------+--------------+--------------------------------------------------------------------------------------+--------------------------------------------+
| yields_units   | dy_units       | dy_decimal   | pnl_proxy                                                                            | interpretation                             |
|----------------+----------------+--------------+--------------------------------------------------------------------------------------+--------------------------------------------|
| percent        | percent points | dy / 100     | sum(position_t * -duration * dy_decimal_t, where dy_decimal_t = (y_t - y_{t-1})/100) | approximate price return per unit notional |
+----------------+----------------+--------------+--------------------------------------------------------------------------------------+--------------------------------------------+
Show code
# 4.6 Define estimation and trading timing conventions to avoid lookahead
# Explicit timing conventions ensure signals are formed with information available at date t.
backtest_timing = pd.DataFrame(
    [
        {"item": "information_set_for_fit", "value": "data through date t"},
        {"item": "weights_computed_at", "value": "end of date t"},
        {"item": "position_effective_date", "value": "t (signals and weights use data through t minus 1)"},
        {"item": "causality_shift_rule", "value": "forward-fill refit outputs to daily and shift by 1 observation"},
        {"item": "pnl_realized_on", "value": "t using dv01 return from t minus 1 to t"},
        {"item": "missing_data_policy", "value": "do not trade when any required tenor missing"},
    ]
)
show_table(backtest_timing)
+-------------------------+----------------------------------------------------------------+
| item                    | value                                                          |
|-------------------------+----------------------------------------------------------------|
| information_set_for_fit | data through date t                                            |
| weights_computed_at     | end of date t                                                  |
| position_effective_date | t (signals and weights use data through t minus 1)             |
| causality_shift_rule    | forward-fill refit outputs to daily and shift by 1 observation |
| pnl_realized_on         | t using dv01 return from t minus 1 to t                        |
| missing_data_policy     | do not trade when any required tenor missing                   |
+-------------------------+----------------------------------------------------------------+
Show code
# 4.7 Define parameter defaults for the next section
# Defaults are methodological choices; changing them can materially alter results.
backtest_params = pd.DataFrame(
    [
        {"param": "pca_window_obs", "value": 756},
        {"param": "pca_refit_step_obs", "value": 21},
        # Stored as a JSON string so it survives parquet and JSON round trips.
        {"param": "butterfly_legs", "value": '["2_yr","5_yr","10_yr"]'},
        # Guardrail for the 3x3 butterfly solve: if the leg-restricted loading matrix is ill-conditioned,
        # keep prior weights instead of producing extreme leverage.
        {"param": "butterfly_max_cond", "value": 200.0},
        {"param": "butterfly_max_l1", "value": 20.0},
        {"param": "butterfly_max_abs", "value": 12.0},
        {"param": "z_window_obs", "value": 252},
        {"param": "entry_z", "value": 2.0},
        {"param": "exit_z", "value": 0.0},
        {"param": "max_holding_obs", "value": 60},
        {"param": "ridge", "value": 1e-6},
        {"param": "cost_per_turnover", "value": 1e-4},
    ]
)
show_table(backtest_params)
+--------------------+-------------------------+
| param              | value                   |
|--------------------+-------------------------|
| pca_window_obs     | 756                     |
| pca_refit_step_obs | 21                      |
| butterfly_legs     | ["2_yr","5_yr","10_yr"] |
| butterfly_max_cond | 200.0                   |
| butterfly_max_l1   | 20.0                    |
| butterfly_max_abs  | 12.0                    |
| z_window_obs       | 252                     |
| entry_z            | 2.0                     |
| exit_z             | 0.0                     |
| max_holding_obs    | 60                      |
| ridge              | 1e-06                   |
| cost_per_turnover  | 0.0001                  |
+--------------------+-------------------------+
Show code
# 4.8 Save a single backtest spec artifact for reproducibility
# Persist the spec so downstream analysis is reproducible and audit-friendly.

overlap_start_date = idx_overlap.min()
overlap_end_date = idx_overlap.max()

spec = {
    "universe_name": universe_name,
    "tenors": tenors,
    "overlap_start_date": overlap_start_date.date().isoformat() if pd.notna(overlap_start_date) else None,
    "overlap_end_date": overlap_end_date.date().isoformat() if pd.notna(overlap_end_date) else None,
    "n_overlap_dates": int(len(idx_overlap)),
    "sample_start_all_non_null": first_date_all_non_null.date().isoformat()
    if pd.notna(first_date_all_non_null)
    else None,
    "sample_end_all_non_null": last_date_all_non_null.date().isoformat() if pd.notna(last_date_all_non_null) else None,
    "n_days_all_non_null": int(n_days_all_non_null),
    "durations_table_path": str(durations_path),
    "timing_conventions": backtest_timing.set_index("item")["value"].to_dict(),
    "parameter_defaults": backtest_params.set_index("param")["value"].to_dict(),
}

spec_path_json = derived_dir / "backtest_spec.json"
with open(spec_path_json, "w") as f:
    json.dump(spec, f, indent=2)
if "cost_per_turnover" not in spec["parameter_defaults"]:
    raise ValueError("Spec missing cost_per_turnover in parameter_defaults")

spec_row = {
    "universe_name": universe_name,
    "tenors": ",".join(tenors),
    "overlap_start_date": spec["overlap_start_date"],
    "overlap_end_date": spec["overlap_end_date"],
    "n_overlap_dates": spec["n_overlap_dates"],
    "sample_start_all_non_null": spec["sample_start_all_non_null"],
    "sample_end_all_non_null": spec["sample_end_all_non_null"],
    "n_days_all_non_null": spec["n_days_all_non_null"],
    "durations_table_path": spec["durations_table_path"],
    "timing_conventions": json.dumps(spec["timing_conventions"], sort_keys=True),
    "parameter_defaults": json.dumps(spec["parameter_defaults"], sort_keys=True),
}

spec_df = pd.DataFrame([spec_row])
spec_path_parquet = derived_dir / "backtest_spec.parquet"
spec_df.to_parquet(spec_path_parquet, engine="pyarrow", index=False)

spec_summary = pd.DataFrame(
    [
        {
            "spec_json_written": spec_path_json.exists(),
            "spec_parquet_written": spec_path_parquet.exists(),
            "overlap_start_date": spec["overlap_start_date"],
            "overlap_end_date": spec["overlap_end_date"],
            "sample_start_all_non_null": spec["sample_start_all_non_null"],
            "sample_end_all_non_null": spec["sample_end_all_non_null"],
        }
    ]
)
show_table(spec_summary)
+---------------------+------------------------+----------------------+--------------------+-----------------------------+---------------------------+
| spec_json_written   | spec_parquet_written   | overlap_start_date   | overlap_end_date   | sample_start_all_non_null   | sample_end_all_non_null   |
|---------------------+------------------------+----------------------+--------------------+-----------------------------+---------------------------|
| True                | True                   | 1990-01-02           | 2026-01-16         | 1990-01-02                  | 2026-01-16                |
+---------------------+------------------------+----------------------+--------------------+-----------------------------+---------------------------+

5. Walk forward PCA and hedge ratio engine

Show code
# 5.1 Load backtest spec and canonical curve on overlap calendar
# Align curves on overlap dates; inputs are curve yields, not tradable instruments.

spec_path = derived_dir / "backtest_spec.json"
if not spec_path.exists():
    raise FileNotFoundError(spec_path)

with open(spec_path, "r") as f:
    backtest_spec = json.load(f)

curve_path = derived_dir / "curve_treasury_par_canonical.parquet"
if not curve_path.exists():
    raise FileNotFoundError(curve_path)

df_curve = pd.read_parquet(curve_path)
if "date" in df_curve.columns:
    df_curve["date"] = pd.to_datetime(df_curve["date"], errors="coerce")
    df_curve = df_curve.set_index("date")
df_curve.index = ensure_naive_dates(df_curve.index)
df_curve = df_curve.sort_index()

try:
    df_raw
    _ = df_raw
except NameError:
    candidates = list(Path("data").rglob("*treasury*par*.parquet"))
    if not candidates:
        raise FileNotFoundError("raw treasury par curve parquet not found")
    df_raw = pd.read_parquet(candidates[0])
    if "date" in df_raw.columns:
        df_raw["date"] = pd.to_datetime(df_raw["date"], errors="coerce")
        df_raw = df_raw.set_index("date")
    df_raw.index = ensure_naive_dates(df_raw.index)
    df_raw = df_raw.sort_index()

idx_overlap = df_curve.index.intersection(df_raw.index)
tenors = list(backtest_spec["tenors"])
sample_start = pd.to_datetime(backtest_spec["sample_start_all_non_null"])
sample_end = pd.to_datetime(backtest_spec["sample_end_all_non_null"])
idx = idx_overlap[(idx_overlap >= sample_start) & (idx_overlap <= sample_end)]

df_yields = df_curve.loc[idx, tenors].copy()
df_yields = df_yields.dropna(axis=0, how="any")

durations_path = Path(backtest_spec.get("durations_table_path", derived_dir / "duration_assumptions.parquet"))
if not durations_path.exists():
    raise FileNotFoundError(durations_path)

durations = pd.read_parquet(durations_path)
duration_panel_path = derived_dir / "duration_panel.parquet"
if not duration_panel_path.exists():
    raise FileNotFoundError(duration_panel_path)
duration_panel = pd.read_parquet(duration_panel_path)
duration_panel.index = ensure_naive_dates(duration_panel.index)
duration_panel = duration_panel.reindex(columns=tenors)

# Duration scaled return proxies are statistical first-order price return proxies from yield changes, not realized trade PnL.
dy_decimal, duration_scaled_return_proxy = build_dv01_returns(df_yields, duration_panel)

panel_summary = pd.DataFrame(
    [
        {
            "n_dates_yields": int(len(df_yields)),
            "n_dates_dv01": int(len(duration_scaled_return_proxy)),
            "start_date": df_yields.index.min(),
            "end_date": df_yields.index.max(),
            "n_tenors": int(len(tenors)),
        }
    ]
)
show_table(panel_summary)
+------------------+----------------+---------------------+---------------------+------------+
|   n_dates_yields |   n_dates_dv01 | start_date          | end_date            |   n_tenors |
|------------------+----------------+---------------------+---------------------+------------|
|             9013 |           9012 | 1990-01-02 00:00:00 | 2026-01-16 00:00:00 |          8 |
+------------------+----------------+---------------------+---------------------+------------+
Show code
# 5.2 Create refit schedule
# Refit schedule defines expanding vs rolling PCA windows to avoid lookahead bias.
params = backtest_spec["parameter_defaults"]
pca_refit_step_obs = int(params["pca_refit_step_obs"])
pca_window_obs = int(params["pca_window_obs"])
min_obs_expanding = 252

butterfly_legs_raw = params.get("butterfly_legs", '["2_yr","5_yr","10_yr"]')
if isinstance(butterfly_legs_raw, str):
    butterfly_legs = json.loads(butterfly_legs_raw)
else:
    butterfly_legs = list(butterfly_legs_raw)

if len(butterfly_legs) != 3:
    raise ValueError(f"butterfly_legs must contain exactly 3 tenors, got {butterfly_legs!r}")
if len(set(butterfly_legs)) != 3:
    raise ValueError(f"butterfly_legs must contain 3 distinct tenors, got {butterfly_legs!r}")
unknown_legs = [t for t in butterfly_legs if t not in tenors]
if unknown_legs:
    raise ValueError(f"butterfly_legs contains tenors not in tenors list: {unknown_legs!r}. tenors={tenors!r}")

butterfly_max_cond = float(params.get("butterfly_max_cond", 200.0))
butterfly_max_l1 = float(params.get("butterfly_max_l1", 20.0))
butterfly_max_abs = float(params.get("butterfly_max_abs", 12.0))

# Stability controls for PCA loadings across refits.
freeze_on_instability = bool(params.get("freeze_on_instability", False))
min_sim3 = float(params.get("min_sim3", 0.7))
min_gap23 = float(params.get("min_gap23", 0.0))

return_proxy_index = duration_scaled_return_proxy.index

def build_refit_schedule(mode: str) -> pd.DataFrame:
    # Expanding uses all history to date; rolling uses a fixed window length.
    if mode == "expanding":
        first_fit_pos = min_obs_expanding - 1
        if first_fit_pos < 0:
            first_fit_pos = 0
    else:
        first_fit_pos = pca_window_obs - 1

    if first_fit_pos >= len(return_proxy_index):
        return pd.DataFrame(
            columns=[
                "refit_date",
                "mode",
                "window_start_date",
                "window_end_date",
                "n_obs_in_window",
                "refit_step_obs",
            ]
        )

    refit_dates = list(return_proxy_index[first_fit_pos::pca_refit_step_obs])
    rows = []
    prev_end_pos = None
    for refit_date in refit_dates:
        end_pos = int(return_proxy_index.get_loc(refit_date))
        if mode == "expanding":
            start_pos = 0
        else:
            start_pos = end_pos - pca_window_obs + 1
        window_start = return_proxy_index[start_pos]
        window_end = return_proxy_index[end_pos]
        n_obs = int(end_pos - start_pos + 1)
        refit_step_obs = int(end_pos - prev_end_pos) if prev_end_pos is not None else None
        rows.append(
            {
                "refit_date": window_end,
                "mode": mode,
                "window_start_date": window_start,
                "window_end_date": window_end,
                "n_obs_in_window": n_obs,
                "refit_step_obs": refit_step_obs,
            }
        )
        prev_end_pos = end_pos
    df_sched = pd.DataFrame(rows)
    if "refit_step_obs" in df_sched.columns and len(df_sched):
        # Use nullable integer dtype so the first refit can have a missing step without casting to float.
        df_sched["refit_step_obs"] = df_sched["refit_step_obs"].astype("Int64")
    return df_sched


schedule_expanding = build_refit_schedule("expanding")
schedule_rolling = build_refit_schedule("rolling")

show_table(schedule_expanding.head(5))
show_table(schedule_expanding.tail(5))
show_table(schedule_rolling.head(5))
show_table(schedule_rolling.tail(5))

schedule_expanding.to_parquet(derived_dir / "pca_refit_schedule_expanding.parquet", engine="pyarrow", index=False)
schedule_rolling.to_parquet(derived_dir / "pca_refit_schedule_rolling.parquet", engine="pyarrow", index=False)
+---------------------+-----------+---------------------+---------------------+-------------------+------------------+
| refit_date          | mode      | window_start_date   | window_end_date     |   n_obs_in_window | refit_step_obs   |
|---------------------+-----------+---------------------+---------------------+-------------------+------------------|
| 1991-01-04 00:00:00 | expanding | 1990-01-03 00:00:00 | 1991-01-04 00:00:00 |               252 | <NA>             |
| 1991-02-05 00:00:00 | expanding | 1990-01-03 00:00:00 | 1991-02-05 00:00:00 |               273 | 21               |
| 1991-03-07 00:00:00 | expanding | 1990-01-03 00:00:00 | 1991-03-07 00:00:00 |               294 | 21               |
| 1991-04-08 00:00:00 | expanding | 1990-01-03 00:00:00 | 1991-04-08 00:00:00 |               315 | 21               |
| 1991-05-07 00:00:00 | expanding | 1990-01-03 00:00:00 | 1991-05-07 00:00:00 |               336 | 21               |
+---------------------+-----------+---------------------+---------------------+-------------------+------------------+
+---------------------+-----------+---------------------+---------------------+-------------------+------------------+
| refit_date          | mode      | window_start_date   | window_end_date     |   n_obs_in_window |   refit_step_obs |
|---------------------+-----------+---------------------+---------------------+-------------------+------------------|
| 2025-09-10 00:00:00 | expanding | 1990-01-03 00:00:00 | 2025-09-10 00:00:00 |              8925 |               21 |
| 2025-10-09 00:00:00 | expanding | 1990-01-03 00:00:00 | 2025-10-09 00:00:00 |              8946 |               21 |
| 2025-11-10 00:00:00 | expanding | 1990-01-03 00:00:00 | 2025-11-10 00:00:00 |              8967 |               21 |
| 2025-12-11 00:00:00 | expanding | 1990-01-03 00:00:00 | 2025-12-11 00:00:00 |              8988 |               21 |
| 2026-01-13 00:00:00 | expanding | 1990-01-03 00:00:00 | 2026-01-13 00:00:00 |              9009 |               21 |
+---------------------+-----------+---------------------+---------------------+-------------------+------------------+
+---------------------+---------+---------------------+---------------------+-------------------+------------------+
| refit_date          | mode    | window_start_date   | window_end_date     |   n_obs_in_window | refit_step_obs   |
|---------------------+---------+---------------------+---------------------+-------------------+------------------|
| 1993-01-11 00:00:00 | rolling | 1990-01-03 00:00:00 | 1993-01-11 00:00:00 |               756 | <NA>             |
| 1993-02-10 00:00:00 | rolling | 1990-02-02 00:00:00 | 1993-02-10 00:00:00 |               756 | 21               |
| 1993-03-12 00:00:00 | rolling | 1990-03-06 00:00:00 | 1993-03-12 00:00:00 |               756 | 21               |
| 1993-04-13 00:00:00 | rolling | 1990-04-04 00:00:00 | 1993-04-13 00:00:00 |               756 | 21               |
| 1993-05-12 00:00:00 | rolling | 1990-05-04 00:00:00 | 1993-05-12 00:00:00 |               756 | 21               |
+---------------------+---------+---------------------+---------------------+-------------------+------------------+
+---------------------+---------+---------------------+---------------------+-------------------+------------------+
| refit_date          | mode    | window_start_date   | window_end_date     |   n_obs_in_window |   refit_step_obs |
|---------------------+---------+---------------------+---------------------+-------------------+------------------|
| 2025-09-10 00:00:00 | rolling | 2022-08-31 00:00:00 | 2025-09-10 00:00:00 |               756 |               21 |
| 2025-10-09 00:00:00 | rolling | 2022-09-30 00:00:00 | 2025-10-09 00:00:00 |               756 |               21 |
| 2025-11-10 00:00:00 | rolling | 2022-11-01 00:00:00 | 2025-11-10 00:00:00 |               756 |               21 |
| 2025-12-11 00:00:00 | rolling | 2022-12-02 00:00:00 | 2025-12-11 00:00:00 |               756 |               21 |
| 2026-01-13 00:00:00 | rolling | 2023-01-04 00:00:00 | 2026-01-13 00:00:00 |               756 |               21 |
+---------------------+---------+---------------------+---------------------+-------------------+------------------+
Show code
# 5.3 Confirm PCA helpers and sign alignment utilities
# Sanity check that PCA utilities are available before running the engine.
helper_checks = pd.DataFrame(
    [
        {"helper": "pca_svd", "available": "pca_svd" in globals()},
        {"helper": "align_signs", "available": "align_signs" in globals()},
        {"helper": "match_and_orient_loadings", "available": "match_and_orient_loadings" in globals()},
    ]
)
show_table(helper_checks)
+---------------------------+-------------+
| helper                    | available   |
|---------------------------+-------------|
| pca_svd                   | True        |
| align_signs               | True        |
| match_and_orient_loadings | True        |
+---------------------------+-------------+
Show code
# 5.4 Fit expanding PCA on each refit window and store loadings
# PCA is applied to duration scaled return proxies for statistical factor extraction, not tradeable assets.
expanding_loadings_rows = []
expanding_evr_rows = []
expanding_diag_rows = []
expanding_match_rows = []
expanding_means_rows = []
prev_loadings_exp = None

for _, row in schedule_expanding.iterrows():
    refit_date = row["refit_date"]
    window_start = row["window_start_date"]
    window_end = row["window_end_date"]
    window = duration_scaled_return_proxy.loc[window_start:window_end, tenors]
    mu = window.mean(axis=0)
    for tenor in tenors:
        expanding_means_rows.append(
            {"refit_date": refit_date, "tenor": tenor, "mean_return": float(mu[tenor])}
        )
    X = window.values
    Xc = X - mu.values
    loadings_raw, _, evr = pca_svd(Xc, k=3)
    if prev_loadings_exp is None:
        loadings_raw = align_signs(loadings_raw, tenors)
    perm_used, flip_flags, sims = match_and_orient_diagnostics(loadings_raw, prev_loadings_exp)
    loadings = match_and_orient_loadings(loadings_raw, prev_loadings_exp)
    perm = _parse_perm_used(perm_used, loadings_raw.shape[0])
    evr = _apply_component_alignment(evr, perm)
    sim1, sim2, sim3 = (sims + [np.nan, np.nan, np.nan])[:3]
    total_var = float(np.nanvar(Xc, axis=0, ddof=1).sum())
    eigvals = np.full(3, np.nan)
    if np.isfinite(total_var) and total_var > 0 and len(evr) > 0:
        eigvals[: len(evr)] = np.array(evr) * total_var
    if prev_loadings_exp is None:
        sim_matrix = np.full((loadings_raw.shape[0], loadings_raw.shape[0]), np.nan)
        prev_loadings_snapshot = None
    else:
        sim_matrix = np.abs(loadings_raw @ prev_loadings_exp.T)
        prev_loadings_snapshot = prev_loadings_exp.copy()
    aligned_pre_freeze = loadings.copy()
    gap12 = float(evr[0] - evr[1]) if len(evr) > 1 else np.nan
    gap23 = float(evr[1] - evr[2]) if len(evr) > 2 else np.nan
    freeze_event = False
    if freeze_on_instability and prev_loadings_exp is not None:
        sim3_abs = abs(sim3) if np.isfinite(sim3) else np.nan
        if (np.isfinite(sim3_abs) and sim3_abs < min_sim3) or (
            np.isfinite(gap23) and gap23 < min_gap23
        ):
            loadings = prev_loadings_exp.copy()
            freeze_event = True

    expanding_match_rows.append(
        {
            "refit_date": refit_date,
            "aligned_loadings": aligned_pre_freeze,
            "prev_loadings": prev_loadings_snapshot,
            "sim_matrix": sim_matrix,
            "perm_used": perm_used,
            "diag_sims": sims,
            "eigvals": eigvals,
        }
    )

    for pc_idx in range(3):
        pc_name = pc_idx + 1
        for t_idx, tenor in enumerate(tenors):
            expanding_loadings_rows.append(
                {
                    "refit_date": refit_date,
                    "pc": pc_name,
                    "tenor": tenor,
                    "loading": float(loadings[pc_idx, t_idx]),
                }
            )
        expanding_evr_rows.append(
            {"refit_date": refit_date, "pc": pc_name, "evr": float(evr[pc_idx])}
        )

    expanding_diag_rows.append(
        {
            "refit_date": refit_date,
            "sim1": sim1,
            "sim2": sim2,
            "sim3": sim3,
            "gap12": gap12,
            "gap23": gap23,
            "perm_used": perm_used,
            "flip_pc1": bool(flip_flags[0]) if len(flip_flags) > 0 else False,
            "flip_pc2": bool(flip_flags[1]) if len(flip_flags) > 1 else False,
            "flip_pc3": bool(flip_flags[2]) if len(flip_flags) > 2 else False,
            "freeze_event": freeze_event,
        }
    )

    if not freeze_event:
        prev_loadings_exp = loadings.copy()

expanding_loadings = pd.DataFrame(expanding_loadings_rows)
expanding_evr = pd.DataFrame(expanding_evr_rows)
expanding_diag = pd.DataFrame(expanding_diag_rows)
expanding_match_diag = pd.DataFrame(expanding_match_rows)
expanding_means_refit = pd.DataFrame(expanding_means_rows)

last_refit = expanding_loadings["refit_date"].max()
last_wide = (
    expanding_loadings.loc[expanding_loadings["refit_date"] == last_refit]
    .pivot(index="pc", columns="tenor", values="loading")
    .reset_index()
)
show_table(last_wide)

expanding_loadings.to_parquet(derived_dir / "pca_loadings_expanding.parquet", engine="pyarrow", index=False)
expanding_evr.to_parquet(derived_dir / "pca_evr_expanding.parquet", engine="pyarrow", index=False)
expanding_diag.to_parquet(derived_dir / "pca_stability_diag_expanding.parquet", engine="pyarrow", index=False)
expanding_means_refit.to_parquet(derived_dir / "pca_means_refit_expanding.parquet", engine="pyarrow", index=False)
+------+----------+------------+-----------+-------------+-----------+------------+------------+------------+
|   pc |    10_yr |       1_yr |      2_yr |        3_mo |      3_yr |       5_yr |       6_mo |       7_yr |
|------+----------+------------+-----------+-------------+-----------+------------+------------+------------|
|    1 | 0.690216 |  0.0430584 |  0.13349  |  0.00472578 |  0.222853 |  0.398216  |  0.0139514 |  0.543573  |
|    2 | 0.562266 | -0.173028  | -0.417164 | -0.0269844  | -0.523709 | -0.444499  | -0.0650224 | -0.0555507 |
|    3 | 0.455006 |  0.176894  |  0.314053 |  0.0346987  |  0.314466 | -0.0793547 |  0.0781562 | -0.741991  |
+------+----------+------------+-----------+-------------+-----------+------------+------------+------------+
Show code
# 5.4.1 PCA stability diagnostics (PC3 alignment)
if "expanding_match_diag" not in globals() or expanding_match_diag is None or len(expanding_match_diag) == 0:
    if "expanding_loadings" not in globals():
        expanding_loadings = pd.read_parquet(
            derived_dir / "pca_loadings_expanding.parquet", engine="pyarrow"
        )
    if "expanding_evr" not in globals():
        expanding_evr = pd.read_parquet(derived_dir / "pca_evr_expanding.parquet", engine="pyarrow")

    loadings_wide = (
        expanding_loadings.pivot(index=["refit_date", "pc"], columns="tenor", values="loading")
        .sort_index()
        .reset_index()
    )
    evr_wide = expanding_evr.pivot(index="refit_date", columns="pc", values="evr").sort_index()
    refit_dates = sorted(loadings_wide["refit_date"].unique())
    tmp_rows = []
    for i, refit_date in enumerate(refit_dates):
        now = (
            loadings_wide.loc[loadings_wide["refit_date"] == refit_date]
            .set_index("pc")
            .loc[[1, 2, 3], tenors]
            .values
        )
        prev = None
        sim_matrix = np.full((3, 3), np.nan)
        if i > 0:
            prev_date = refit_dates[i - 1]
            prev = (
                loadings_wide.loc[loadings_wide["refit_date"] == prev_date]
                .set_index("pc")
                .loc[[1, 2, 3], tenors]
                .values
            )
            sim_matrix = np.abs(now @ prev.T)
        eigvals = np.full(3, np.nan)
        if refit_date in evr_wide.index:
            evr_row = evr_wide.loc[refit_date].reindex([1, 2, 3]).values
            eigvals[: len(evr_row)] = evr_row

        tmp_rows.append(
            {
                "refit_date": refit_date,
                "aligned_loadings": now,
                "prev_loadings": prev,
                "sim_matrix": sim_matrix,
                "eigvals": eigvals,
            }
        )
    expanding_match_diag = pd.DataFrame(tmp_rows)

diag_rows = []
for _, row in expanding_match_diag.iterrows():
    refit_date = row["refit_date"]
    aligned = row["aligned_loadings"]
    prev = row["prev_loadings"]
    sim_matrix = row["sim_matrix"]
    eigvals = row["eigvals"]
    if prev is None:
        continue
    sim3 = np.nan
    if aligned is not None and prev is not None:
        sim3 = abs(float(np.dot(aligned[2], prev[2])))
    sep3 = np.nan
    if sim_matrix is not None:
        col = np.asarray(sim_matrix)[:, 2]
        col = col[np.isfinite(col)]
        if len(col) >= 2:
            ordered = np.sort(col)[::-1]
            sep3 = float(ordered[0] - ordered[1])
    eig2 = eigvals[1] if eigvals is not None and len(eigvals) > 1 else np.nan
    eig3 = eigvals[2] if eigvals is not None and len(eigvals) > 2 else np.nan
    gap23 = (eig2 - eig3) / max(abs(eig2), 1e-12) if np.isfinite(eig2) else np.nan
    diag_rows.append({"refit_date": refit_date, "sim3": sim3, "sep3": sep3, "gap23": gap23})

diag_df = pd.DataFrame(diag_rows).sort_values("refit_date")
n_refits = int(len(diag_df))

sim3_valid = diag_df["sim3"].dropna()
sep3_valid = diag_df["sep3"].dropna()
gap23_valid = diag_df["gap23"].dropna()

frac_sim3 = float((sim3_valid >= 0.8).mean()) if len(sim3_valid) else np.nan
frac_sep3 = float((sep3_valid >= 0.05).mean()) if len(sep3_valid) else np.nan
frac_gap23 = float((gap23_valid <= 0.02).mean()) if len(gap23_valid) else np.nan

print("PCA stability diagnostics (expanding)")
print(f"refits evaluated: {n_refits}")
print(f"fraction sim3 >= 0.8: {frac_sim3:.2%}" if np.isfinite(frac_sim3) else "fraction sim3 >= 0.8: n/a")
print(f"fraction sep3 >= 0.05: {frac_sep3:.2%}" if np.isfinite(frac_sep3) else "fraction sep3 >= 0.05: n/a")
print(f"fraction gap23 <= 0.02: {frac_gap23:.2%}" if np.isfinite(frac_gap23) else "fraction gap23 <= 0.02: n/a")

worst_sim3 = diag_df.dropna(subset=["sim3"]).nsmallest(10, "sim3")
worst_sep3 = diag_df.dropna(subset=["sep3"]).nsmallest(10, "sep3")
worst_gap23 = diag_df.dropna(subset=["gap23"]).nsmallest(10, "gap23")

print("\nworst sim3")
print(worst_sim3[["refit_date", "sim3"]].to_string(index=False))
print("\nworst sep3")
print(worst_sep3[["refit_date", "sep3"]].to_string(index=False))
print("\nsmallest gap23")
print(worst_gap23[["refit_date", "gap23"]].to_string(index=False))

if "weights_bt" in globals() and isinstance(weights_bt, pd.DataFrame):
    turnover_daily = weights_bt.diff().abs().sum(axis=1)
    refit_turnover = turnover_daily.reindex(diag_df["refit_date"]).dropna()
    if len(refit_turnover):
        top_turnover = refit_turnover.sort_values(ascending=False).head(10)
        print("\nlargest refit turnover (weights_bt)")
        print(top_turnover.to_string())

plot_ready = "plt" in globals()
if not plot_ready:
    try:
        import matplotlib.pyplot as plt

        plot_ready = True
    except Exception:
        plot_ready = False

if plot_ready and len(diag_df):
    fig, ax = plt.subplots()
    ax.plot(diag_df["refit_date"], diag_df["sim3"])
    ax.set_title("PC3 alignment similarity (sim3)")
    ax.set_xlabel("refit_date")
    ax.set_ylabel("sim3")
    fig.autofmt_xdate()
    plt.show()


def solve_pca_neutral_butterfly_weights(
    loadings_matrix: np.ndarray,
    tenors: list[str],
    legs: list[str],
    ridge: float,
    tol: float = 1e-8,
) -> np.ndarray:
    """
    Solve a 3 leg PCA neutral butterfly on (PC1, PC2, PC3) using only the chosen legs.

    Constraints on the 3 leg weights w_leg:
        PC1 exposure = 0
        PC2 exposure = 0
        PC3 exposure = 1

    Returned vector w_full has length len(tenors) with only the leg entries nonzero.
    """
    L = np.asarray(loadings_matrix, dtype="float64")
    if L.shape[0] != 3:
        raise ValueError(f"loadings_matrix must have 3 rows (PC1..PC3), got shape {L.shape}")
    if L.shape[1] != len(tenors):
        raise ValueError(f"loadings_matrix has {L.shape[1]} columns but tenors has {len(tenors)} entries")
    if len(legs) != 3 or len(set(legs)) != 3:
        raise ValueError(f"legs must be 3 distinct tenors, got {legs!r}")

    leg_idx = [tenors.index(t) for t in legs]
    A = L[:, leg_idx]  # (3, 3): rows are PC1..PC3, cols are legs
    b = np.array([0.0, 0.0, 1.0], dtype="float64")

    # Prefer an exact solve when the 3x3 system is well-conditioned. Fall back to a ridge-stabilized
    # minimum-norm solve if the system is singular or numerically unstable.
    w_leg = None
    try:
        cond = float(np.linalg.cond(A))
    except Exception:
        cond = float("inf")
    use_ridge = (not np.isfinite(cond)) or (cond > 1e8)

    if not use_ridge:
        try:
            w_leg = np.linalg.solve(A, b)
        except np.linalg.LinAlgError:
            w_leg = None

    if w_leg is None:
        gram = A @ A.T
        w_leg = A.T @ np.linalg.solve(gram + float(ridge) * np.eye(3), b)

    w_full = np.zeros(len(tenors), dtype="float64")
    w_full[leg_idx] = w_leg

    exposure = L @ w_full
    resid = exposure - b
    if not np.all(np.isfinite(resid)) or float(np.max(np.abs(resid))) > tol:
        raise ValueError(
            "PCA neutral butterfly constraint check failed "
            f"(legs={legs!r}, max_abs_resid={float(np.max(np.abs(resid))):.3e}, resid={resid})"
        )
    return w_full
PCA stability diagnostics (expanding)
refits evaluated: 417
fraction sim3 >= 0.8: 100.00%
fraction sep3 >= 0.05: 100.00%
fraction gap23 <= 0.02: 0.00%

worst sim3
refit_date     sim3
2009-02-26 0.995937
1992-02-07 0.996442
1991-11-05 0.996632
1991-02-05 0.997319
1991-06-06 0.997709
1991-03-07 0.997771
1991-05-07 0.998145
1991-08-06 0.998246
1993-05-12 0.998601
1992-07-09 0.998613

worst sep3
refit_date     sep3
1991-02-05 0.963603
1991-03-07 0.970893
2008-12-23 0.981430
1993-02-10 0.983161
1998-10-23 0.985436
1991-05-07 0.985810
2000-08-25 0.986064
1992-01-08 0.988770
1991-06-06 0.989882
1991-07-08 0.989970

smallest gap23
refit_date    gap23
2000-01-27 0.650806
1999-12-28 0.651066
1999-11-26 0.651982
2000-02-28 0.652117
2000-03-28 0.652614
1999-10-26 0.653112
2000-08-25 0.653806
2000-04-27 0.654359
2000-12-27 0.655052
1999-09-24 0.655202

largest refit turnover (weights_bt)
refit_date
1991-02-05    0.0
2008-08-19    0.0
2015-01-12    0.0
2014-12-10    0.0
2014-11-07    0.0
2014-10-08    0.0
2014-09-09    0.0
2014-08-08    0.0
2014-07-10    0.0
2014-06-10    0.0

Show code
# 5.5 Solve hedge weights at each refit date from loadings (expanding)
# Weights are factor-neutralization coefficients, not an implementable bond portfolio.
ridge = float(params["ridge"])
butterfly_leg_idx = [tenors.index(t) for t in butterfly_legs]

expanding_weights_rows = []
turnover_rows = []
prev_w = None

for refit_date in schedule_expanding["refit_date"].tolist():
    sub = expanding_loadings.loc[expanding_loadings["refit_date"] == refit_date]
    loadings_wide = sub.pivot(index="pc", columns="tenor", values="loading").loc[[1, 2, 3], tenors]
    loadings_matrix = loadings_wide.values.astype("float64")  # (3, N)
    A_leg = loadings_matrix[:, butterfly_leg_idx]
    cond_leg = float(np.linalg.cond(A_leg))
    weights_frozen = False
    freeze_reason = "ok"
    l1_leg = np.nan
    max_abs_leg = np.nan
    if (not np.isfinite(cond_leg)) or (cond_leg > butterfly_max_cond):
        if prev_w is None:
            raise ValueError(
                f"Butterfly solve is ill-conditioned at first refit (refit_date={refit_date}, cond={cond_leg:.3e})"
            )
        w = prev_w.copy()
        weights_frozen = True
        freeze_reason = "cond"
        w_leg_used = w[butterfly_leg_idx]
        l1_leg = float(np.abs(w_leg_used).sum())
        max_abs_leg = float(np.abs(w_leg_used).max())
    else:
        w_candidate = solve_pca_neutral_butterfly_weights(loadings_matrix, tenors, butterfly_legs, ridge=ridge)
        w_leg_candidate = w_candidate[butterfly_leg_idx]
        l1_leg = float(np.abs(w_leg_candidate).sum())
        max_abs_leg = float(np.abs(w_leg_candidate).max())
        if (l1_leg > butterfly_max_l1) or (max_abs_leg > butterfly_max_abs):
            if prev_w is None:
                raise ValueError(
                    f"Butterfly solve exceeds weight caps at first refit (refit_date={refit_date})"
                )
            w = prev_w.copy()
            weights_frozen = True
            freeze_reason = "weight_cap"
        else:
            w = w_candidate

    for t_idx, tenor in enumerate(tenors):
        expanding_weights_rows.append(
            {"refit_date": refit_date, "tenor": tenor, "weight": float(w[t_idx])}
        )

    turnover = 0.5 * float(np.abs(w - prev_w).sum()) if prev_w is not None else np.nan
    turnover_rows.append(
        {
            "refit_date": refit_date,
            "turnover": turnover,
            "cond_leg": cond_leg,
            "l1_leg": l1_leg,
            "max_abs_leg": max_abs_leg,
            "freeze_event": weights_frozen,
            "freeze_reason": freeze_reason,
        }
    )
    prev_w = w.copy()

expanding_weights_refit = pd.DataFrame(expanding_weights_rows)
expanding_turnover = pd.DataFrame(turnover_rows)

show_table(expanding_turnover.tail(10))

expanding_weights_refit.to_parquet(
    derived_dir / "pca_weights_refit_expanding.parquet", engine="pyarrow", index=False
)
expanding_turnover.to_parquet(
    derived_dir / "pca_turnover_expanding.parquet", engine="pyarrow", index=False
)

section5_dir = Path("outputs/section_05")
section5_dir.mkdir(parents=True, exist_ok=True)
exp_diag_path = section5_dir / "weight_refit_diagnostics_expanding.csv"
expanding_turnover.to_csv(exp_diag_path, index=False)
+---------------------+-------------+------------+----------+---------------+----------------+-----------------+
| refit_date          |    turnover |   cond_leg |   l1_leg |   max_abs_leg | freeze_event   | freeze_reason   |
|---------------------+-------------+------------+----------+---------------+----------------+-----------------|
| 2025-04-09 00:00:00 | 0.000835135 |    3.02445 |  4.107   |       2.17912 | False          | ok              |
| 2025-05-09 00:00:00 | 0.000667535 |    3.02435 |  4.10754 |       2.17872 | False          | ok              |
| 2025-06-10 00:00:00 | 0.000865126 |    3.02516 |  4.10802 |       2.17983 | False          | ok              |
| 2025-07-11 00:00:00 | 0.000619673 |    3.02618 |  4.10926 |       2.18047 | False          | ok              |
| 2025-08-11 00:00:00 | 0.00119574  |    3.02413 |  4.10711 |       2.17839 | False          | ok              |
| 2025-09-10 00:00:00 | 0.00125707  |    3.02223 |  4.1046  |       2.1767  | False          | ok              |
| 2025-10-09 00:00:00 | 0.00281423  |    3.01827 |  4.09897 |       2.17414 | False          | ok              |
| 2025-11-10 00:00:00 | 0.00018179  |    3.01814 |  4.09889 |       2.17429 | False          | ok              |
| 2025-12-11 00:00:00 | 0.00229289  |    3.02171 |  4.10348 |       2.17681 | False          | ok              |
| 2026-01-13 00:00:00 | 0.0020956   |    3.01858 |  4.09928 |       2.17446 | False          | ok              |
+---------------------+-------------+------------+----------+---------------+----------------+-----------------+
Show code
# 5.6 Expand refit loadings and weights to daily trade dates with 1 day shift (expanding)
# Shift by one observation enforces causality: weights known through date t minus 1 are applied on date t.
weights_wide = expanding_weights_refit.pivot(index="refit_date", columns="tenor", values="weight").sort_index()
weights_wide = weights_wide.reindex(columns=tenors).fillna(0.0)
weights_daily, weights_refit_used = apply_causality_shift(
    weights_wide, duration_scaled_return_proxy.index, "expanding_weights"
)

weights_daily.to_parquet(
    derived_dir / "pca_weights_daily_expanding.parquet", engine="pyarrow", index=True
)

weights_daily_export = weights_daily[butterfly_legs].copy()
weights_daily_export = weights_daily_export.rename(columns={t: f"w_{t}" for t in butterfly_legs})
weights_daily_export = weights_daily_export.reset_index()
if "index" in weights_daily_export.columns:
    weights_daily_export = weights_daily_export.rename(columns={"index": "date"})
elif weights_daily_export.columns[0] != "date":
    weights_daily_export = weights_daily_export.rename(columns={weights_daily_export.columns[0]: "date"})
weights_daily_export["variant"] = "expanding"
weights_daily_export.to_csv(section5_dir / "weights_daily_expanding.csv", index=False)

loadings_wide = (
    expanding_loadings.pivot(index="refit_date", columns=["pc", "tenor"], values="loading").sort_index()
)
loadings_daily, loadings_refit_used = apply_causality_shift(
    loadings_wide, duration_scaled_return_proxy.index, "expanding_loadings"
)

loadings_daily_long = loadings_daily.stack(level=[0, 1]).reset_index()
loadings_daily_long.columns = ["trade_date", "pc", "tenor", "loading"]

loadings_daily_long.to_parquet(
    derived_dir / "pca_loadings_daily_expanding.parquet", engine="pyarrow", index=False
)

means_wide = (
    expanding_means_refit.pivot(index="refit_date", columns="tenor", values="mean_return").sort_index()
)
means_wide = means_wide.reindex(columns=tenors)
means_daily, means_refit_used = apply_causality_shift(
    means_wide, duration_scaled_return_proxy.index, "expanding_means"
)
means_daily.to_parquet(
    derived_dir / "pca_means_daily_expanding.parquet", engine="pyarrow", index=True
)

weights_preview = weights_daily[butterfly_legs].head(5).reset_index()
weights_preview = weights_preview.rename(columns={"index": "trade_date"})
show_table(weights_preview)
+---------------------+---------+----------+----------+
| date                |    2_yr |     5_yr |    10_yr |
|---------------------+---------+----------+----------|
| 1991-01-07 00:00:00 | 2.78064 | -2.14971 | 0.624491 |
| 1991-01-08 00:00:00 | 2.78064 | -2.14971 | 0.624491 |
| 1991-01-09 00:00:00 | 2.78064 | -2.14971 | 0.624491 |
| 1991-01-10 00:00:00 | 2.78064 | -2.14971 | 0.624491 |
| 1991-01-11 00:00:00 | 2.78064 | -2.14971 | 0.624491 |
+---------------------+---------+----------+----------+
/var/folders/8b/dng58k4174jcldvgrm6q_mzr0000gn/T/ipykernel_12823/2787367712.py:30: FutureWarning: The previous implementation of stack is deprecated and will be removed in a future version of pandas. See the What's New notes for pandas 2.1.0 for details. Specify future_stack=True to adopt the new implementation and silence this warning.
  loadings_daily_long = loadings_daily.stack(level=[0, 1]).reset_index()
Show code
# 5.7 Plot expanding turnover and weight paths
# Visual diagnostics only; does not imply tradeability of the curve points.
import matplotlib.pyplot as plt
%matplotlib inline

section5_dir = Path("outputs/section_05")
section5_dir.mkdir(parents=True, exist_ok=True)

# The plotted turnover series below is refit-to-refit turnover on PCA weights (not daily strategy turnover).
turnover_refit_expanding = expanding_turnover[["refit_date", "turnover"]].rename(
    columns={"turnover": "turnover_refit"}
)
turnover_refit_expanding.to_csv(section5_dir / "turnover_refit_expanding.csv", index=False)

fig, ax = plt.subplots()
ax.plot(expanding_turnover["refit_date"], expanding_turnover["turnover"])
ax.set_title("Expanding PCA turnover")
ax.set_xlabel("refit_date")
ax.set_ylabel("turnover")
fig.autofmt_xdate()
fig.savefig(section5_dir / "fig_turnover_expanding.png", dpi=150, bbox_inches="tight")
plt.show()

fig, ax = plt.subplots()
for t in butterfly_legs:
    ax.plot(weights_daily.index, weights_daily[t], label=t)
ax.set_title("Expanding PCA neutral butterfly weights (3 legs)")
ax.set_xlabel("date")
ax.set_ylabel("weight")
ax.legend(ncol=2, fontsize=8)
fig.autofmt_xdate()
fig.savefig(section5_dir / "fig_weights_paths_expanding.png", dpi=150, bbox_inches="tight")
plt.show()

Show code
# 5.8.1 Fit rolling PCA on each refit window and store loadings
# Rolling PCA keeps a fixed historical window to reduce regime drift effects.
rolling_loadings_rows = []
rolling_evr_rows = []
rolling_diag_rows = []
rolling_means_rows = []
prev_loadings_roll = None

for _, row in schedule_rolling.iterrows():
    refit_date = row["refit_date"]
    window_start = row["window_start_date"]
    window_end = row["window_end_date"]
    window = duration_scaled_return_proxy.loc[window_start:window_end, tenors]
    mu = window.mean(axis=0)
    for tenor in tenors:
        rolling_means_rows.append(
            {"refit_date": refit_date, "tenor": tenor, "mean_return": float(mu[tenor])}
        )
    X = window.values
    Xc = X - mu.values
    loadings_raw, _, evr = pca_svd(Xc, k=3)
    if prev_loadings_roll is None:
        loadings_raw = align_signs(loadings_raw, tenors)
    perm_used, flip_flags, sims = match_and_orient_diagnostics(loadings_raw, prev_loadings_roll)
    loadings = match_and_orient_loadings(loadings_raw, prev_loadings_roll)
    perm = _parse_perm_used(perm_used, loadings_raw.shape[0])
    evr = _apply_component_alignment(evr, perm)
    sim1, sim2, sim3 = (sims + [np.nan, np.nan, np.nan])[:3]
    gap12 = float(evr[0] - evr[1]) if len(evr) > 1 else np.nan
    gap23 = float(evr[1] - evr[2]) if len(evr) > 2 else np.nan
    freeze_event = False
    if freeze_on_instability and prev_loadings_roll is not None:
        sim3_abs = abs(sim3) if np.isfinite(sim3) else np.nan
        if (np.isfinite(sim3_abs) and sim3_abs < min_sim3) or (
            np.isfinite(gap23) and gap23 < min_gap23
        ):
            loadings = prev_loadings_roll.copy()
            freeze_event = True

    for pc_idx in range(3):
        pc_name = pc_idx + 1
        for t_idx, tenor in enumerate(tenors):
            rolling_loadings_rows.append(
                {
                    "refit_date": refit_date,
                    "pc": pc_name,
                    "tenor": tenor,
                    "loading": float(loadings[pc_idx, t_idx]),
                }
            )
        rolling_evr_rows.append(
            {"refit_date": refit_date, "pc": pc_name, "evr": float(evr[pc_idx])}
        )

    rolling_diag_rows.append(
        {
            "refit_date": refit_date,
            "sim1": sim1,
            "sim2": sim2,
            "sim3": sim3,
            "gap12": gap12,
            "gap23": gap23,
            "perm_used": perm_used,
            "flip_pc1": bool(flip_flags[0]) if len(flip_flags) > 0 else False,
            "flip_pc2": bool(flip_flags[1]) if len(flip_flags) > 1 else False,
            "flip_pc3": bool(flip_flags[2]) if len(flip_flags) > 2 else False,
            "freeze_event": freeze_event,
        }
    )

    if not freeze_event:
        prev_loadings_roll = loadings.copy()

rolling_loadings = pd.DataFrame(rolling_loadings_rows)
rolling_evr = pd.DataFrame(rolling_evr_rows)
rolling_diag = pd.DataFrame(rolling_diag_rows)
rolling_means_refit = pd.DataFrame(rolling_means_rows)

last_refit_rolling = rolling_loadings["refit_date"].max()
last_wide_rolling = (
    rolling_loadings.loc[rolling_loadings["refit_date"] == last_refit_rolling]
    .pivot(index="pc", columns="tenor", values="loading")
    .reset_index()
)
show_table(last_wide_rolling)

rolling_loadings.to_parquet(derived_dir / "pca_loadings_rolling.parquet", engine="pyarrow", index=False)
rolling_evr.to_parquet(derived_dir / "pca_evr_rolling.parquet", engine="pyarrow", index=False)
rolling_diag.to_parquet(derived_dir / "pca_stability_diag_rolling.parquet", engine="pyarrow", index=False)
rolling_means_refit.to_parquet(derived_dir / "pca_means_refit_rolling.parquet", engine="pyarrow", index=False)
+------+----------+------------+-----------+-------------+-----------+-----------+------------+------------+
|   pc |    10_yr |       1_yr |      2_yr |        3_mo |      3_yr |      5_yr |       6_mo |       7_yr |
|------+----------+------------+-----------+-------------+-----------+-----------+------------+------------|
|    1 | 0.665066 |  0.0538192 |  0.161945 |  0.00325043 |  0.251863 |  0.413094 |  0.0134843 |  0.542485  |
|    2 | 0.575223 | -0.204522  | -0.461453 | -0.0117627  | -0.513663 | -0.383365 | -0.0562221 | -0.0152798 |
|    3 | 0.453698 |  0.270425  |  0.448855 |  0.0239472  |  0.245861 | -0.324612 |  0.0956926 | -0.586521  |
+------+----------+------------+-----------+-------------+-----------+-----------+------------+------------+
Show code
# 5.8.2 Solve hedge weights at each refit date from loadings (rolling)
# Rolling weights are still statistical constructs, not tradable allocations.
rolling_weights_rows = []
rolling_turnover_rows = []
prev_w = None

for refit_date in schedule_rolling["refit_date"].tolist():
    sub = rolling_loadings.loc[rolling_loadings["refit_date"] == refit_date]
    loadings_wide = sub.pivot(index="pc", columns="tenor", values="loading").loc[[1, 2, 3], tenors]
    loadings_matrix = loadings_wide.values.astype("float64")  # (3, N)
    A_leg = loadings_matrix[:, butterfly_leg_idx]
    cond_leg = float(np.linalg.cond(A_leg))
    weights_frozen = False
    freeze_reason = "ok"
    l1_leg = np.nan
    max_abs_leg = np.nan
    if (not np.isfinite(cond_leg)) or (cond_leg > butterfly_max_cond):
        if prev_w is None:
            raise ValueError(
                f"Butterfly solve is ill-conditioned at first refit (refit_date={refit_date}, cond={cond_leg:.3e})"
            )
        w = prev_w.copy()
        weights_frozen = True
        freeze_reason = "cond"
        w_leg_used = w[butterfly_leg_idx]
        l1_leg = float(np.abs(w_leg_used).sum())
        max_abs_leg = float(np.abs(w_leg_used).max())
    else:
        w_candidate = solve_pca_neutral_butterfly_weights(loadings_matrix, tenors, butterfly_legs, ridge=ridge)
        w_leg_candidate = w_candidate[butterfly_leg_idx]
        l1_leg = float(np.abs(w_leg_candidate).sum())
        max_abs_leg = float(np.abs(w_leg_candidate).max())
        if (l1_leg > butterfly_max_l1) or (max_abs_leg > butterfly_max_abs):
            if prev_w is None:
                raise ValueError(
                    f"Butterfly solve exceeds weight caps at first refit (refit_date={refit_date})"
                )
            w = prev_w.copy()
            weights_frozen = True
            freeze_reason = "weight_cap"
        else:
            w = w_candidate

    for t_idx, tenor in enumerate(tenors):
        rolling_weights_rows.append(
            {"refit_date": refit_date, "tenor": tenor, "weight": float(w[t_idx])}
        )

    turnover = 0.5 * float(np.abs(w - prev_w).sum()) if prev_w is not None else np.nan
    rolling_turnover_rows.append(
        {
            "refit_date": refit_date,
            "turnover": turnover,
            "cond_leg": cond_leg,
            "l1_leg": l1_leg,
            "max_abs_leg": max_abs_leg,
            "freeze_event": weights_frozen,
            "freeze_reason": freeze_reason,
        }
    )
    prev_w = w.copy()

rolling_weights_refit = pd.DataFrame(rolling_weights_rows)
rolling_turnover = pd.DataFrame(rolling_turnover_rows)

show_table(rolling_turnover.tail(10))

rolling_weights_refit.to_parquet(
    derived_dir / "pca_weights_refit_rolling.parquet", engine="pyarrow", index=False
)
rolling_turnover.to_parquet(
    derived_dir / "pca_turnover_rolling.parquet", engine="pyarrow", index=False
)

rolling_weights_wide_refit = rolling_weights_refit.pivot(
    index="refit_date", columns="tenor", values="weight"
).sort_index()
rolling_weights_wide_refit = rolling_weights_wide_refit.reindex(columns=tenors).fillna(0.0)
rolling_weight_diag = rolling_turnover.merge(
    rolling_weights_wide_refit[butterfly_legs].rename(columns={t: f"w_{t}" for t in butterfly_legs}),
    left_on="refit_date",
    right_index=True,
    how="left",
)
roll_diag_path = section5_dir / "weight_refit_diagnostics_rolling.csv"
rolling_weight_diag.to_csv(roll_diag_path, index=False)

refit_schedule_rolling = schedule_rolling[["refit_date"]].copy()
refit_schedule_rolling = refit_schedule_rolling.rename(columns={"refit_date": "expected_refit_date"})
refit_schedule_rolling["has_refit_row"] = refit_schedule_rolling["expected_refit_date"].isin(
    rolling_turnover["refit_date"]
)
refit_schedule_rolling.to_csv(section5_dir / "refit_schedule_rolling.csv", index=False)
+---------------------+------------+------------+----------+---------------+----------------+-----------------+
| refit_date          |   turnover |   cond_leg |   l1_leg |   max_abs_leg | freeze_event   | freeze_reason   |
|---------------------+------------+------------+----------+---------------+----------------+-----------------|
| 2025-04-09 00:00:00 | 0.0142502  |    1.71512 |  2.44731 |       1.18411 | False          | ok              |
| 2025-05-09 00:00:00 | 0.0491689  |    1.78992 |  2.54565 |       1.22367 | False          | ok              |
| 2025-06-10 00:00:00 | 0.0115078  |    1.77719 |  2.52263 |       1.21585 | False          | ok              |
| 2025-07-11 00:00:00 | 0.00268511 |    1.7876  |  2.52393 |       1.21919 | False          | ok              |
| 2025-08-11 00:00:00 | 0.00697592 |    1.78312 |  2.50998 |       1.20877 | False          | ok              |
| 2025-09-10 00:00:00 | 0.0309889  |    1.83055 |  2.57196 |       1.23508 | False          | ok              |
| 2025-10-09 00:00:00 | 0.0445744  |    1.76077 |  2.48281 |       1.19906 | False          | ok              |
| 2025-11-10 00:00:00 | 0.0219465  |    1.79549 |  2.5267  |       1.21982 | False          | ok              |
| 2025-12-11 00:00:00 | 0.00665076 |    1.78548 |  2.5134  |       1.21384 | False          | ok              |
| 2026-01-13 00:00:00 | 0.00917039 |    1.76907 |  2.49506 |       1.20354 | False          | ok              |
+---------------------+------------+------------+----------+---------------+----------------+-----------------+
Show code
# 5.8.3 Expand refit loadings and weights to daily trade dates with 1 day shift (rolling)
# Apply the same one-day lag to preserve a realistic information set.
rolling_weights_wide = rolling_weights_wide_refit
rolling_weights_daily, rolling_weights_refit_used = apply_causality_shift(
    rolling_weights_wide, duration_scaled_return_proxy.index, "rolling_weights"
)

rolling_weights_daily.to_parquet(
    derived_dir / "pca_weights_daily_rolling.parquet", engine="pyarrow", index=True
)

rolling_weights_export = rolling_weights_daily[butterfly_legs].copy()
rolling_weights_export = rolling_weights_export.rename(columns={t: f"w_{t}" for t in butterfly_legs})
rolling_weights_export = rolling_weights_export.reset_index()
if "index" in rolling_weights_export.columns:
    rolling_weights_export = rolling_weights_export.rename(columns={"index": "date"})
elif rolling_weights_export.columns[0] != "date":
    rolling_weights_export = rolling_weights_export.rename(columns={rolling_weights_export.columns[0]: "date"})
rolling_weights_export["variant"] = "rolling"
rolling_weights_export.to_csv(section5_dir / "weights_daily_rolling.csv", index=False)

flat_eps = 1e-12
rolling_leg_daily = rolling_weights_daily[butterfly_legs].copy()
daily_delta = rolling_leg_daily.diff().abs().max(axis=1)
flat_mask = daily_delta < flat_eps
flat_mask = flat_mask.fillna(False)

flat_segments = []
current_start = None
prev_date = None

for date, is_flat in flat_mask.items():
    if is_flat and current_start is None:
        current_start = date
    if (not is_flat) and current_start is not None:
        end_date = prev_date
        n_days = int(flat_mask.loc[current_start:end_date].shape[0])
        if n_days >= 60:
            refits_in_interval = schedule_rolling[
                (schedule_rolling["refit_date"] >= current_start)
                & (schedule_rolling["refit_date"] <= end_date)
            ]
            diag_in_interval = rolling_weight_diag[
                (rolling_weight_diag["refit_date"] >= current_start)
                & (rolling_weight_diag["refit_date"] <= end_date)
            ]
            freeze_rate = (
                float(diag_in_interval["freeze_event"].mean()) if len(diag_in_interval) else np.nan
            )
            reason_mode = (
                diag_in_interval["freeze_reason"].mode().iloc[0]
                if len(diag_in_interval) and len(diag_in_interval["freeze_reason"].mode())
                else None
            )
            flat_segments.append(
                {
                    "start_date": current_start,
                    "end_date": end_date,
                    "n_days": n_days,
                    "overlaps_refit_dates": int(len(refits_in_interval)),
                    "freeze_rate_in_interval": freeze_rate,
                    "most_common_freeze_reason": reason_mode,
                    "max_cond_leg_in_interval": float(diag_in_interval["cond_leg"].max())
                    if len(diag_in_interval)
                    else np.nan,
                    "max_l1_leg_in_interval": float(diag_in_interval["l1_leg"].max())
                    if len(diag_in_interval)
                    else np.nan,
                    "max_max_abs_leg_in_interval": float(diag_in_interval["max_abs_leg"].max())
                    if len(diag_in_interval)
                    else np.nan,
                }
            )
        current_start = None
    prev_date = date

if current_start is not None:
    end_date = flat_mask.index[-1]
    n_days = int(flat_mask.loc[current_start:end_date].shape[0])
    if n_days >= 60:
        refits_in_interval = schedule_rolling[
            (schedule_rolling["refit_date"] >= current_start) & (schedule_rolling["refit_date"] <= end_date)
        ]
        diag_in_interval = rolling_weight_diag[
            (rolling_weight_diag["refit_date"] >= current_start)
            & (rolling_weight_diag["refit_date"] <= end_date)
        ]
        freeze_rate = float(diag_in_interval["freeze_event"].mean()) if len(diag_in_interval) else np.nan
        reason_mode = (
            diag_in_interval["freeze_reason"].mode().iloc[0]
            if len(diag_in_interval) and len(diag_in_interval["freeze_reason"].mode())
            else None
        )
        flat_segments.append(
            {
                "start_date": current_start,
                "end_date": end_date,
                "n_days": n_days,
                "overlaps_refit_dates": int(len(refits_in_interval)),
                "freeze_rate_in_interval": freeze_rate,
                "most_common_freeze_reason": reason_mode,
                "max_cond_leg_in_interval": float(diag_in_interval["cond_leg"].max())
                if len(diag_in_interval)
                else np.nan,
                "max_l1_leg_in_interval": float(diag_in_interval["l1_leg"].max())
                if len(diag_in_interval)
                else np.nan,
                "max_max_abs_leg_in_interval": float(diag_in_interval["max_abs_leg"].max())
                if len(diag_in_interval)
                else np.nan,
            }
        )

flat_segments_df = pd.DataFrame(
    flat_segments,
    columns=[
        "start_date",
        "end_date",
        "n_days",
        "overlaps_refit_dates",
        "freeze_rate_in_interval",
        "most_common_freeze_reason",
        "max_cond_leg_in_interval",
        "max_l1_leg_in_interval",
        "max_max_abs_leg_in_interval",
    ],
)
flat_segments_df.to_csv(section5_dir / "rolling_flat_segments.csv", index=False)

rolling_loadings_wide = (
    rolling_loadings.pivot(index="refit_date", columns=["pc", "tenor"], values="loading").sort_index()
)
rolling_loadings_daily, rolling_loadings_refit_used = apply_causality_shift(
    rolling_loadings_wide, duration_scaled_return_proxy.index, "rolling_loadings"
)

rolling_loadings_daily_long = rolling_loadings_daily.stack(level=[0, 1]).reset_index()
rolling_loadings_daily_long.columns = ["trade_date", "pc", "tenor", "loading"]

rolling_loadings_daily_long.to_parquet(
    derived_dir / "pca_loadings_daily_rolling.parquet", engine="pyarrow", index=False
)

rolling_means_wide = (
    rolling_means_refit.pivot(index="refit_date", columns="tenor", values="mean_return").sort_index()
)
rolling_means_wide = rolling_means_wide.reindex(columns=tenors)
rolling_means_daily, rolling_means_refit_used = apply_causality_shift(
    rolling_means_wide, duration_scaled_return_proxy.index, "rolling_means"
)
rolling_means_daily.to_parquet(
    derived_dir / "pca_means_daily_rolling.parquet", engine="pyarrow", index=True
)
/var/folders/8b/dng58k4174jcldvgrm6q_mzr0000gn/T/ipykernel_12823/3235275485.py:137: FutureWarning: The previous implementation of stack is deprecated and will be removed in a future version of pandas. See the What's New notes for pandas 2.1.0 for details. Specify future_stack=True to adopt the new implementation and silence this warning.
  rolling_loadings_daily_long = rolling_loadings_daily.stack(level=[0, 1]).reset_index()
Show code
# 5.8.4 Plot rolling turnover and weight paths
# Plots help assess stability and turnover, not execution feasibility.
# The plotted turnover series below is refit-to-refit turnover on PCA weights (not daily strategy turnover).
turnover_refit_rolling = rolling_turnover[["refit_date", "turnover"]].rename(
    columns={"turnover": "turnover_refit"}
)
turnover_refit_rolling.to_csv(section5_dir / "turnover_refit_rolling.csv", index=False)

fig, ax = plt.subplots()
ax.plot(rolling_turnover["refit_date"], rolling_turnover["turnover"])
ax.set_title("Rolling PCA turnover")
ax.set_xlabel("refit_date")
ax.set_ylabel("turnover")
fig.autofmt_xdate()
fig.savefig(section5_dir / "fig_turnover_rolling.png", dpi=150, bbox_inches="tight")
plt.show()

fig, ax = plt.subplots()
for t in butterfly_legs:
    ax.plot(rolling_weights_daily.index, rolling_weights_daily[t], label=t)
ax.set_title("Rolling PCA neutral butterfly weights (3 legs)")
ax.set_xlabel("date")
ax.set_ylabel("weight")
ax.legend(ncol=2, fontsize=8)
fig.autofmt_xdate()
fig.savefig(section5_dir / "fig_weights_paths_rolling.png", dpi=150, bbox_inches="tight")
plt.show()

Show code
# 5.9 Sanity checks for causality and stability
# Verify timing and turnover profiles for basic causality and stability checks.
def summarize_turnover(turnover_df: pd.DataFrame) -> dict:
    avg = float(turnover_df["turnover"].mean()) if len(turnover_df) else np.nan
    p95 = float(turnover_df["turnover"].quantile(0.95)) if len(turnover_df) else np.nan
    return {"average_turnover": avg, "p95_turnover": p95}


exp_first_refit = schedule_expanding["refit_date"].min()
roll_first_refit = schedule_rolling["refit_date"].min()
exp_first_trade = weights_daily.index.min() if len(weights_daily) else None
roll_first_trade = rolling_weights_daily.index.min() if len(rolling_weights_daily) else None
exp_last_trade = weights_daily.index.max() if len(weights_daily) else None
roll_last_trade = rolling_weights_daily.index.max() if len(rolling_weights_daily) else None

exp_turn = summarize_turnover(expanding_turnover)
roll_turn = summarize_turnover(rolling_turnover)

mid_refit_exp = schedule_expanding.iloc[len(schedule_expanding) // 2]["refit_date"]
mid_window_end = schedule_expanding.loc[
    schedule_expanding["refit_date"] == mid_refit_exp, "window_end_date"
].iloc[0]
mid_trade_date = weights_daily.loc[weights_daily.index > mid_refit_exp].index.min()

sanity_rows = [
    {
        "mode": "expanding",
        "first_refit_date": exp_first_refit,
        "first_trade_date_with_weights": exp_first_trade,
        "last_trade_date_with_weights": exp_last_trade,
        "average_turnover": exp_turn["average_turnover"],
        "p95_turnover": exp_turn["p95_turnover"],
    },
    {
        "mode": "rolling",
        "first_refit_date": roll_first_refit,
        "first_trade_date_with_weights": roll_first_trade,
        "last_trade_date_with_weights": roll_last_trade,
        "average_turnover": roll_turn["average_turnover"],
        "p95_turnover": roll_turn["p95_turnover"],
    },
    {
        "mode": "expanding_causality_check",
        "first_refit_date": mid_refit_exp,
        "first_trade_date_with_weights": mid_trade_date,
        "last_trade_date_with_weights": mid_window_end,
        "average_turnover": float(mid_trade_date > mid_window_end) if mid_trade_date is not None else np.nan,
        "p95_turnover": np.nan,
    },
]

sanity_checks = pd.DataFrame(sanity_rows)
show_table(sanity_checks)
sanity_checks.to_parquet(derived_dir / "pca_engine_sanity_checks.parquet", engine="pyarrow", index=False)

stability_summary_rows = []
for mode, diag in [("expanding", expanding_diag), ("rolling", rolling_diag)]:
    if diag is None or len(diag) == 0:
        continue
    sim3_vals = diag["sim3"].replace([np.inf, -np.inf], np.nan).dropna()
    stability_summary_rows.append(
        {
            "mode": mode,
            "sim3_p05": float(sim3_vals.quantile(0.05)) if len(sim3_vals) else np.nan,
            "sim3_median": float(sim3_vals.median()) if len(sim3_vals) else np.nan,
            "gap23_median": float(diag["gap23"].median()) if len(diag) else np.nan,
            "n_freeze_events": int(diag["freeze_event"].sum()) if "freeze_event" in diag.columns else 0,
        }
    )

if stability_summary_rows:
    show_table(pd.DataFrame(stability_summary_rows))
+---------------------------+---------------------+---------------------------------+--------------------------------+--------------------+----------------+
| mode                      | first_refit_date    | first_trade_date_with_weights   | last_trade_date_with_weights   |   average_turnover |   p95_turnover |
|---------------------------+---------------------+---------------------------------+--------------------------------+--------------------+----------------|
| expanding                 | 1991-01-04 00:00:00 | 1991-01-07 00:00:00             | 2026-01-16 00:00:00            |          0.0627314 |       0.305502 |
| rolling                   | 1993-01-11 00:00:00 | 1993-01-12 00:00:00             | 2026-01-16 00:00:00            |          0.160707  |       0.576262 |
| expanding_causality_check | 2008-07-21 00:00:00 | 2008-07-22 00:00:00             | 2008-07-21 00:00:00            |          1         |     nan        |
+---------------------------+---------------------+---------------------------------+--------------------------------+--------------------+----------------+
+-----------+------------+---------------+----------------+-------------------+
| mode      |   sim3_p05 |   sim3_median |   gap23_median |   n_freeze_events |
|-----------+------------+---------------+----------------+-------------------|
| expanding |   0.999577 |      0.999998 |      0.0209219 |                 0 |
| rolling   |   0.996619 |      0.99984  |      0.0205231 |                 0 |
+-----------+------------+---------------+----------------+-------------------+

5.X Pre Section 6 verification

Show code
# 5.X.1 Index integrity summary
modes = ["expanding", "rolling"]
rows_idx = []
for mode in modes:
    w_path = derived_dir / f"pca_weights_daily_{mode}.parquet"
    l_path = derived_dir / f"pca_loadings_daily_{mode}.parquet"
    w_exists = w_path.exists()
    l_exists = l_path.exists()
    if w_exists:
        w_df = pd.read_parquet(w_path)
        if isinstance(w_df.index, pd.DatetimeIndex):
            w_index_unique = bool(w_df.index.is_unique)
            w_index_mono = bool(w_df.index.is_monotonic_increasing)
            n_dates_w = int(len(w_df.index))
            first_w = w_df.index.min()
            last_w = w_df.index.max()
        else:
            # fallback if written with trade_date column
            if "trade_date" in w_df.columns:
                idx = pd.to_datetime(w_df["trade_date"], errors="coerce")
                w_index_unique = bool(idx.is_unique)
                w_index_mono = bool(idx.is_monotonic_increasing)
                n_dates_w = int(len(idx))
                first_w = idx.min()
                last_w = idx.max()
            else:
                w_index_unique = False
                w_index_mono = False
                n_dates_w = int(len(w_df))
                first_w = None
                last_w = None
    else:
        w_index_unique = False
        w_index_mono = False
        n_dates_w = 0
        first_w = None
        last_w = None

    if l_exists:
        l_df = pd.read_parquet(l_path)
        if isinstance(l_df.index, pd.DatetimeIndex):
            l_index_unique = bool(l_df.index.is_unique)
            l_index_mono = bool(l_df.index.is_monotonic_increasing)
            n_dates_l = int(len(l_df.index))
        else:
            if "trade_date" in l_df.columns:
                idxl = pd.to_datetime(l_df["trade_date"], errors="coerce")
                l_index_unique = bool(idxl.is_unique)
                l_index_mono = bool(idxl.is_monotonic_increasing)
                n_dates_l = int(len(idxl.unique()))
            else:
                l_index_unique = False
                l_index_mono = False
                n_dates_l = int(len(l_df))
    else:
        l_index_unique = False
        l_index_mono = False
        n_dates_l = 0

    rows_idx.append(
        {
            "mode": mode,
            "weights_index_unique": w_index_unique,
            "weights_index_monotonic": w_index_mono,
            "loadings_index_unique": l_index_unique,
            "loadings_index_monotonic": l_index_mono,
            "n_dates_weights": n_dates_w,
            "n_dates_loadings": n_dates_l,
            "first_date_weights": first_w,
            "last_date_weights": last_w,
        }
    )

show_table(pd.DataFrame(rows_idx))
+-----------+------------------------+---------------------------+-------------------------+----------------------------+-------------------+--------------------+----------------------+---------------------+
| mode      | weights_index_unique   | weights_index_monotonic   | loadings_index_unique   | loadings_index_monotonic   |   n_dates_weights |   n_dates_loadings | first_date_weights   | last_date_weights   |
|-----------+------------------------+---------------------------+-------------------------+----------------------------+-------------------+--------------------+----------------------+---------------------|
| expanding | True                   | True                      | False                   | True                       |              8760 |               8760 | 1991-01-07 00:00:00  | 2026-01-16 00:00:00 |
| rolling   | True                   | True                      | False                   | True                       |              8256 |               8256 | 1993-01-12 00:00:00  | 2026-01-16 00:00:00 |
+-----------+------------------------+---------------------------+-------------------------+----------------------------+-------------------+--------------------+----------------------+---------------------+
Show code
# 5.X.2 Explicit causality spot check
rows_caus = []
if "duration_scaled_return_proxy" not in globals():
    raise NameError("duration_scaled_return_proxy is required for causality checks; run Section 5.1 first.")

trade_index = duration_scaled_return_proxy.index
for mode in modes:
    w_refit_path = derived_dir / f"pca_weights_refit_{mode}.parquet"
    w_daily_path = derived_dir / f"pca_weights_daily_{mode}.parquet"
    if not w_refit_path.exists() or not w_daily_path.exists():
        continue

    w_refit = pd.read_parquet(w_refit_path)
    w_refit_wide = w_refit.pivot(index="refit_date", columns="tenor", values="weight").sort_index()
    w_refit_wide = w_refit_wide.reindex(columns=tenors).fillna(0.0)
    expected_daily, refit_used = apply_causality_shift(
        w_refit_wide, trade_index, f"{mode}_weights_check"
    )

    w_daily = pd.read_parquet(w_daily_path)
    if not isinstance(w_daily.index, pd.DatetimeIndex) and "trade_date" in w_daily.columns:
        w_daily.index = pd.to_datetime(w_daily["trade_date"], errors="coerce")

    idx_common = expected_daily.index.intersection(w_daily.index)
    if len(idx_common) == 0:
        continue

    # Sample dates across the trade calendar.
    sample_pos = [0, int(len(idx_common) * 0.25), int(len(idx_common) * 0.5), int(len(idx_common) * 0.75), len(idx_common) - 1]
    sample_dates = [idx_common[i] for i in sample_pos if i > 0]
    for d in sample_dates:
        pos = trade_index.get_loc(d)
        if pos == 0:
            continue
        prev_date = trade_index[pos - 1]
        refit_date_used = refit_used.loc[d] if d in refit_used.index else pd.NaT
        weights_match = (
            w_daily.loc[d, tenors].fillna(0.0).values
            == expected_daily.loc[d, tenors].fillna(0.0).values
        ).all()
        rows_caus.append(
            {
                "mode": mode,
                "trade_date": d,
                "prev_trade_date": prev_date,
                "refit_date_used": refit_date_used,
                "refit_date_leq_prev_trade": bool(pd.notna(refit_date_used) and refit_date_used <= prev_date),
                "refit_date_is_trade_date": bool(pd.notna(refit_date_used) and refit_date_used == d),
                "weights_match_expected": bool(weights_match),
            }
        )

show_table(pd.DataFrame(rows_caus))
+-----------+---------------------+---------------------+---------------------+-----------------------------+----------------------------+--------------------------+
| mode      | trade_date          | prev_trade_date     | refit_date_used     | refit_date_leq_prev_trade   | refit_date_is_trade_date   | weights_match_expected   |
|-----------+---------------------+---------------------+---------------------+-----------------------------+----------------------------+--------------------------|
| expanding | 1999-10-05 00:00:00 | 1999-10-04 00:00:00 | 1999-09-24 00:00:00 | True                        | False                      | True                     |
| expanding | 2008-07-09 00:00:00 | 2008-07-08 00:00:00 | 2008-06-19 00:00:00 | True                        | False                      | True                     |
| expanding | 2017-04-13 00:00:00 | 2017-04-12 00:00:00 | 2017-03-17 00:00:00 | True                        | False                      | True                     |
| expanding | 2026-01-16 00:00:00 | 2026-01-15 00:00:00 | 2026-01-13 00:00:00 | True                        | False                      | True                     |
| rolling   | 2001-04-09 00:00:00 | 2001-04-06 00:00:00 | 2001-03-29 00:00:00 | True                        | False                      | True                     |
| rolling   | 2009-07-16 00:00:00 | 2009-07-15 00:00:00 | 2009-06-26 00:00:00 | True                        | False                      | True                     |
| rolling   | 2017-10-13 00:00:00 | 2017-10-12 00:00:00 | 2017-09-15 00:00:00 | True                        | False                      | True                     |
| rolling   | 2026-01-16 00:00:00 | 2026-01-15 00:00:00 | 2026-01-13 00:00:00 | True                        | False                      | True                     |
+-----------+---------------------+---------------------+---------------------+-----------------------------+----------------------------+--------------------------+
Show code
# 5.X.3 Constraint residuals check
tol = 1e-6
rows_res = []
rows_res_summary = []
for mode in modes:
    w_daily_path = derived_dir / f"pca_weights_daily_{mode}.parquet"
    l_daily_path = derived_dir / f"pca_loadings_daily_{mode}.parquet"
    if not w_daily_path.exists() or not l_daily_path.exists():
        continue
    w_daily = pd.read_parquet(w_daily_path)
    l_daily = pd.read_parquet(l_daily_path)
    # normalize loadings long to wide per pc
    if "trade_date" in l_daily.columns:
        l_daily["trade_date"] = pd.to_datetime(l_daily["trade_date"])
        pcs = {pc: l_daily.loc[l_daily["pc"] == pc].pivot(index="trade_date", columns="tenor", values="loading") for pc in [1, 2, 3]}
    else:
        # try wide format multiindex
        pcs = {}
        for pc in [1, 2, 3]:
            sub = l_daily.xs(pc, level=0, drop_level=False) if hasattr(l_daily.index, "levels") else pd.DataFrame()
            pcs[pc] = sub

    # select five dates across sample
    dates_all = w_daily.index.unique()
    if len(dates_all) == 0:
        continue
    idxs = [0, int(len(dates_all) * 0.25), int(len(dates_all) * 0.5), int(len(dates_all) * 0.75), len(dates_all) - 1]
    sel_dates = [dates_all[i] for i in idxs]
    for d in sel_dates:
        if d not in w_daily.index:
            continue
        w_vec = w_daily.loc[d, tenors].astype("float64")
        p1 = pcs[1].loc[d, tenors].astype("float64") if d in pcs[1].index else None
        p2 = pcs[2].loc[d, tenors].astype("float64") if d in pcs[2].index else None
        p3 = pcs[3].loc[d, tenors].astype("float64") if d in pcs[3].index else None
        pc1_dot = float(np.dot(p1.values, w_vec.values)) if p1 is not None else np.nan
        pc2_dot = float(np.dot(p2.values, w_vec.values)) if p2 is not None else np.nan
        pc3_dot = float(np.dot(p3.values, w_vec.values)) if p3 is not None else np.nan
        rows_res.append(
            {
                "mode": mode,
                "trade_date": d,
                "pc1_dot_w": pc1_dot,
                "pc2_dot_w": pc2_dot,
                "pc3_dot_w": pc3_dot,
                "within_tol": abs(pc1_dot) <= tol and abs(pc2_dot) <= tol and abs(pc3_dot - 1.0) <= tol,
            }
        )

    if rows_res:
        df_res = pd.DataFrame([r for r in rows_res if r["mode"] == mode])
        abscols = ["pc1_dot_w", "pc2_dot_w", "pc3_dot_w"]
        max_abs = {c: float(df_res[c].abs().max()) for c in abscols}
        rows_res_summary.append({"mode": mode, **max_abs})

if rows_res:
    show_table(pd.DataFrame(rows_res))
    show_table(pd.DataFrame(rows_res_summary))
+-----------+---------------------+--------------+--------------+-------------+--------------+
| mode      | trade_date          |    pc1_dot_w |    pc2_dot_w |   pc3_dot_w | within_tol   |
|-----------+---------------------+--------------+--------------+-------------+--------------|
| expanding | 1991-01-07 00:00:00 |  0           |  2.22045e-16 |           1 | True         |
| expanding | 1999-10-05 00:00:00 | -1.11022e-16 |  0           |           1 | True         |
| expanding | 2008-07-09 00:00:00 |  1.11022e-16 |  0           |           1 | True         |
| expanding | 2017-04-13 00:00:00 |  1.11022e-16 |  0           |           1 | True         |
| expanding | 2026-01-16 00:00:00 | -5.55112e-17 |  0           |           1 | True         |
| rolling   | 1993-01-12 00:00:00 | -1.11022e-16 |  0           |           1 | True         |
| rolling   | 2001-04-09 00:00:00 |  0           |  4.44089e-16 |           1 | True         |
| rolling   | 2009-07-16 00:00:00 | -5.55112e-17 |  0           |           1 | True         |
| rolling   | 2017-10-13 00:00:00 |  1.11022e-16 | -2.22045e-16 |           1 | True         |
| rolling   | 2026-01-16 00:00:00 | -1.11022e-16 |  0           |           1 | True         |
+-----------+---------------------+--------------+--------------+-------------+--------------+
+-----------+-------------+-------------+-------------+
| mode      |   pc1_dot_w |   pc2_dot_w |   pc3_dot_w |
|-----------+-------------+-------------+-------------|
| expanding | 1.11022e-16 | 2.22045e-16 |           1 |
| rolling   | 1.11022e-16 | 4.44089e-16 |           1 |
+-----------+-------------+-------------+-------------+
Show code
# 5.X.4 Component stability quick check
from math import sqrt
def cosine_sim(a, b):
    an = np.linalg.norm(a)
    bn = np.linalg.norm(b)
    return float(np.dot(a, b) / (an * bn)) if an > 0 and bn > 0 else np.nan

rows_sim_all = []
summary_sim = []
for mode in modes:
    loadings_path = derived_dir / f"pca_loadings_{mode}.parquet"
    if not loadings_path.exists():
        continue
    ldf = pd.read_parquet(loadings_path)
    refits = sorted(ldf["refit_date"].unique())
    ten = sorted(ldf[ldf["refit_date"] == refits[0]]["tenor"].unique(), key=lambda x: order_rank.get(x, 10_000))
    sim_rows = []
    for i in range(1, len(refits)):
        t0 = refits[i - 1]
        t1 = refits[i]
        for pc in [1, 2, 3]:
            v0 = (
                ldf.loc[(ldf["refit_date"] == t0) & (ldf["pc"] == pc)].set_index("tenor")["loading"].reindex(ten).fillna(0).values
            )
            v1 = (
                ldf.loc[(ldf["refit_date"] == t1) & (ldf["pc"] == pc)].set_index("tenor")["loading"].reindex(ten).fillna(0).values
            )
            sim = cosine_sim(v0, v1)
            sim_rows.append({"mode": mode, "refit_date": t1, "pc": pc, "similarity": sim})

    sim_df = pd.DataFrame(sim_rows)
    if sim_df.empty:
        continue
    worst = sim_df.sort_values("similarity").head(10)
    show_table(worst.pivot(index="refit_date", columns="pc", values="similarity").reset_index())
    summary = sim_df.groupby("pc").similarity.agg([lambda s: float(s.quantile(0.05)), "median", "min"]).reset_index()
    summary.columns = ["pc", "p05_similarity", "median_similarity", "min_similarity"]
    show_table(summary)
+---------------------+----------+
| refit_date          |        3 |
|---------------------+----------|
| 1991-02-05 00:00:00 | 0.997319 |
| 1991-03-07 00:00:00 | 0.997771 |
| 1991-05-07 00:00:00 | 0.998145 |
| 1991-06-06 00:00:00 | 0.997709 |
| 1991-08-06 00:00:00 | 0.998246 |
| 1991-11-05 00:00:00 | 0.996632 |
| 1992-02-07 00:00:00 | 0.996442 |
| 1992-07-09 00:00:00 | 0.998613 |
| 1993-05-12 00:00:00 | 0.998601 |
| 2009-02-26 00:00:00 | 0.995937 |
+---------------------+----------+
+------+------------------+---------------------+------------------+
|   pc |   p05_similarity |   median_similarity |   min_similarity |
|------+------------------+---------------------+------------------|
|    1 |         0.999996 |            1        |         0.999969 |
|    2 |         0.99995  |            0.999999 |         0.998634 |
|    3 |         0.999577 |            0.999998 |         0.995937 |
+------+------------------+---------------------+------------------+
+---------------------+------------+----------+
| refit_date          |          2 |        3 |
|---------------------+------------+----------|
| 1998-10-23 00:00:00 |   0.992945 | 0.988612 |
| 2008-06-19 00:00:00 | nan        | 0.987744 |
| 2008-10-20 00:00:00 | nan        | 0.961954 |
| 2008-12-23 00:00:00 | nan        | 0.982705 |
| 2009-02-26 00:00:00 | nan        | 0.891205 |
| 2011-10-31 00:00:00 | nan        | 0.989354 |
| 2012-03-05 00:00:00 | nan        | 0.964239 |
| 2020-03-26 00:00:00 | nan        | 0.989548 |
| 2023-04-04 00:00:00 | nan        | 0.985814 |
+---------------------+------------+----------+
+------+------------------+---------------------+------------------+
|   pc |   p05_similarity |   median_similarity |   min_similarity |
|------+------------------+---------------------+------------------|
|    1 |         0.999963 |            0.999997 |         0.999145 |
|    2 |         0.999424 |            0.999957 |         0.992945 |
|    3 |         0.996619 |            0.99984  |         0.891205 |
+------+------------------+---------------------+------------------+
Show code
# 5.X.5 Weight magnitude and leverage diagnostics
rows_wmag = []
for mode in modes:
    w_daily_path = derived_dir / f"pca_weights_daily_{mode}.parquet"
    if not w_daily_path.exists():
        continue
    w_daily = pd.read_parquet(w_daily_path)
    if not isinstance(w_daily.index, pd.DatetimeIndex) and "trade_date" in w_daily.columns:
        w_daily.index = pd.to_datetime(w_daily["trade_date"])
    max_abs = w_daily.abs().max(axis=1)
    l1 = w_daily.abs().sum(axis=1)
    rows_wmag.append({
        "mode": mode,
        "max_abs_weight_p95": float(max_abs.quantile(0.95)),
        "max_abs_weight_max": float(max_abs.max()),
        "l1_weight_p95": float(l1.quantile(0.95)),
        "l1_weight_max": float(l1.max()),
    })
    top5 = l1.sort_values(ascending=False).head(5)
    if len(top5):
        top_dates = top5.index.tolist()
        top_weights = w_daily.loc[top_dates].reset_index()
        show_table(top_weights)

show_table(pd.DataFrame(rows_wmag))
+---------------------+--------+--------+--------+---------+--------+----------+--------+---------+
| date                |   3_mo |   6_mo |   1_yr |    2_yr |   3_yr |     5_yr |   7_yr |   10_yr |
|---------------------+--------+--------+--------+---------+--------+----------+--------+---------|
| 1996-02-08 00:00:00 |      0 |      0 |      0 | 7.48584 |      0 | -5.38757 |      0 | 1.55008 |
| 1996-01-23 00:00:00 |      0 |      0 |      0 | 7.48584 |      0 | -5.38757 |      0 | 1.55008 |
| 1996-01-25 00:00:00 |      0 |      0 |      0 | 7.48584 |      0 | -5.38757 |      0 | 1.55008 |
| 1996-01-22 00:00:00 |      0 |      0 |      0 | 7.48584 |      0 | -5.38757 |      0 | 1.55008 |
| 1996-01-26 00:00:00 |      0 |      0 |      0 | 7.48584 |      0 | -5.38757 |      0 | 1.55008 |
+---------------------+--------+--------+--------+---------+--------+----------+--------+---------+
+---------------------+--------+--------+--------+---------+--------+----------+--------+---------+
| date                |   3_mo |   6_mo |   1_yr |    2_yr |   3_yr |     5_yr |   7_yr |   10_yr |
|---------------------+--------+--------+--------+---------+--------+----------+--------+---------|
| 1996-12-04 00:00:00 |      0 |      0 |      0 | 10.5176 |      0 | -7.09464 |      0 | 1.88608 |
| 1996-09-12 00:00:00 |      0 |      0 |      0 | 10.5176 |      0 | -7.09464 |      0 | 1.88608 |
| 1996-09-26 00:00:00 |      0 |      0 |      0 | 10.5176 |      0 | -7.09464 |      0 | 1.88608 |
| 1996-09-25 00:00:00 |      0 |      0 |      0 | 10.5176 |      0 | -7.09464 |      0 | 1.88608 |
| 1996-09-24 00:00:00 |      0 |      0 |      0 | 10.5176 |      0 | -7.09464 |      0 | 1.88608 |
+---------------------+--------+--------+--------+---------+--------+----------+--------+---------+
+-----------+----------------------+----------------------+-----------------+-----------------+
| mode      |   max_abs_weight_p95 |   max_abs_weight_max |   l1_weight_p95 |   l1_weight_max |
|-----------+----------------------+----------------------+-----------------+-----------------|
| expanding |               6.3642 |              7.48584 |         11.9479 |         14.4235 |
| rolling   |              10.5176 |             10.5176  |         19.4983 |         19.4983 |
+-----------+----------------------+----------------------+-----------------+-----------------+
Show code
# 5.X.6 Reload check and file existence
files = []
for mode in modes:
    for pat in [f"pca_weights_daily_{mode}.parquet", f"pca_loadings_daily_{mode}.parquet", f"pca_turnover_{mode}.parquet", f"pca_weights_refit_{mode}.parquet", f"pca_loadings_{mode}.parquet"]:
        p = derived_dir / pat
        files.append({"file": str(p), "exists": p.exists(), "size_kb": float(p.stat().st_size / 1024) if p.exists() else None})

show_table(pd.DataFrame(files))
+---------------------------------------------------+----------+-----------+
| file                                              | exists   |   size_kb |
|---------------------------------------------------+----------+-----------|
| data/derived/pca_weights_daily_expanding.parquet  | True     |   97.249  |
| data/derived/pca_loadings_daily_expanding.parquet | True     |  210.892  |
| data/derived/pca_turnover_expanding.parquet       | True     |   23.1758 |
| data/derived/pca_weights_refit_expanding.parquet  | True     |   20.5117 |
| data/derived/pca_loadings_expanding.parquet       | True     |  103.218  |
| data/derived/pca_weights_daily_rolling.parquet    | True     |   90.8467 |
| data/derived/pca_loadings_daily_rolling.parquet   | True     |  198.935  |
| data/derived/pca_turnover_rolling.parquet         | True     |   21.7988 |
| data/derived/pca_weights_refit_rolling.parquet    | True     |   18.3496 |
| data/derived/pca_loadings_rolling.parquet         | True     |   97.458  |
+---------------------------------------------------+----------+-----------+

6. Residual construction and signal generation

Show code
# 6.1 Load spec, tenors, yields, and overlap calendar

# Load backtest spec (tenors, sample bounds, params) and canonical curve,
# then align dates to the raw curve so later residuals use an overlap calendar.

spec_path = derived_dir / "backtest_spec.json"
if not spec_path.exists():
    raise FileNotFoundError(spec_path)

with open(spec_path, "r") as f:
    backtest_spec = json.load(f)

curve_path = derived_dir / "curve_treasury_par_canonical.parquet"
if not curve_path.exists():
    raise FileNotFoundError(curve_path)

df_curve = pd.read_parquet(curve_path)
if "date" in df_curve.columns:
    df_curve["date"] = pd.to_datetime(df_curve["date"], errors="coerce")
    df_curve = df_curve.set_index("date")
df_curve.index = ensure_naive_dates(df_curve.index)
df_curve = df_curve.sort_index()

try:
    df_raw
    _ = df_raw
except NameError:
    candidates = list(Path("data").rglob("*treasury*par*.parquet"))
    if not candidates:
        raise FileNotFoundError("raw treasury par curve parquet not found")
    df_raw = pd.read_parquet(candidates[0])
    if "date" in df_raw.columns:
        df_raw["date"] = pd.to_datetime(df_raw["date"], errors="coerce")
        df_raw = df_raw.set_index("date")
    df_raw.index = ensure_naive_dates(df_raw.index)
    df_raw = df_raw.sort_index()

idx_overlap = df_curve.index.intersection(df_raw.index)
tenors = list(backtest_spec["tenors"])
sample_start = pd.to_datetime(backtest_spec["sample_start_all_non_null"])
sample_end = pd.to_datetime(backtest_spec["sample_end_all_non_null"])
idx = idx_overlap[(idx_overlap >= sample_start) & (idx_overlap <= sample_end)]

df_yields = df_curve.loc[idx, tenors].copy()
valid_data = df_yields.notna().all(axis=1)

params = backtest_spec["parameter_defaults"]
butterfly_legs_raw = params.get("butterfly_legs", '["2_yr","5_yr","10_yr"]')
if isinstance(butterfly_legs_raw, str):
    butterfly_legs = json.loads(butterfly_legs_raw)
else:
    butterfly_legs = list(butterfly_legs_raw)
if len(butterfly_legs) != 3 or len(set(butterfly_legs)) != 3:
    raise ValueError(f"butterfly_legs must contain exactly 3 distinct tenors, got {butterfly_legs!r}")
unknown_legs = [t for t in butterfly_legs if t not in tenors]
if unknown_legs:
    raise ValueError(f"butterfly_legs contains tenors not in tenors list: {unknown_legs!r}. tenors={tenors!r}")

durations_path = Path(backtest_spec.get("durations_table_path", derived_dir / "duration_assumptions.parquet"))
if not durations_path.exists():
    raise FileNotFoundError(durations_path)

durations = pd.read_parquet(durations_path)
duration_panel_path = derived_dir / "duration_panel.parquet"
if not duration_panel_path.exists():
    raise FileNotFoundError(duration_panel_path)
duration_panel = pd.read_parquet(duration_panel_path)
duration_panel.index = ensure_naive_dates(duration_panel.index)
duration_panel = duration_panel.reindex(columns=tenors)

# Convert yield changes to a duration scaled yield change return proxy.
dy_decimal, duration_scaled_return_proxy = build_dv01_returns(df_yields, duration_panel)

summary_6_1 = pd.DataFrame(
    [
        {
            "start_date": df_yields.index.min(),
            "end_date": df_yields.index.max(),
            "n_dates": int(len(df_yields)),
            "n_valid_dates": int(valid_data.sum()),
            "share_valid": float(valid_data.mean()) if len(valid_data) else np.nan,
        }
    ]
)
show_table(summary_6_1)
+---------------------+---------------------+-----------+-----------------+---------------+
| start_date          | end_date            |   n_dates |   n_valid_dates |   share_valid |
|---------------------+---------------------+-----------+-----------------+---------------|
| 1990-01-02 00:00:00 | 2026-01-16 00:00:00 |      9017 |            9013 |      0.999556 |
+---------------------+---------------------+-----------+-----------------+---------------+
Show code
# 6.3 Compute residual and z score for expanding weights
weights_exp_path = derived_dir / "pca_weights_daily_expanding.parquet"
if not weights_exp_path.exists():
    raise FileNotFoundError(weights_exp_path)

df_w = pd.read_parquet(weights_exp_path)
if not isinstance(df_w.index, pd.DatetimeIndex) and "trade_date" in df_w.columns:
    df_w.index = pd.to_datetime(df_w["trade_date"], errors="coerce")

df_w.index = ensure_naive_dates(df_w.index)
r_proxy = duration_scaled_return_proxy
trade_index = r_proxy.index
df_w = df_w.reindex(trade_index)

valid_weights = df_w[butterfly_legs].notna().all(axis=1)
valid_returns = r_proxy[butterfly_legs].notna().all(axis=1)
valid_all = valid_weights & valid_returns

all_nan_legs = r_proxy[butterfly_legs].isna().all(axis=1)
if all_nan_legs.any():
    sample = all_nan_legs[all_nan_legs].index[:5].tolist()
    warnings.warn(
        f"Return proxy has all-NaN butterfly legs on {int(all_nan_legs.sum())} dates "
        f"(sample={sample}); residuals will be NaN on these dates."
    )

# Residual is computed on the return-proxy calendar; z-score uses trailing stats,
# while the trading logic uses the prior-day z-score to avoid look-ahead.
residual_ret = (df_w * r_proxy).sum(axis=1, min_count=1)
residual_ret = residual_ret.where(valid_all, np.nan)
residual_level = residual_ret.cumsum()
residual_level.name = "residual"

Wz = int(backtest_spec["parameter_defaults"]["z_window_obs"])
mean_r = residual_level.rolling(Wz, min_periods=Wz).mean()
std_r = residual_level.rolling(Wz, min_periods=Wz).std(ddof=0)

z = (residual_level - mean_r) / std_r
z = z.where(std_r > 0, np.nan)
z.name = "zscore"

z_finite = z.replace([np.inf, -np.inf], np.nan).dropna()
z_percentiles = (
    z_finite.quantile([0.01, 0.05, 0.50, 0.95, 0.99]).to_dict() if len(z_finite) else {}
)

summary_6_3 = pd.DataFrame(
    [
        {
            "n_residual_non_null": int(residual_level.notna().sum()),
            "first_valid_residual_date": residual_level.first_valid_index(),
            "first_valid_z_date": z.first_valid_index(),
            "z_p01": z_percentiles.get(0.01, np.nan),
            "z_p05": z_percentiles.get(0.05, np.nan),
            "z_p50": z_percentiles.get(0.50, np.nan),
            "z_p95": z_percentiles.get(0.95, np.nan),
            "z_p99": z_percentiles.get(0.99, np.nan),
        }
    ]
)
show_table(summary_6_3)
+-----------------------+-----------------------------+----------------------+----------+----------+-----------+---------+---------+
|   n_residual_non_null | first_valid_residual_date   | first_valid_z_date   |    z_p01 |    z_p05 |     z_p50 |   z_p95 |   z_p99 |
|-----------------------+-----------------------------+----------------------+----------+----------+-----------+---------+---------|
|                  8756 | 1991-01-07 00:00:00         | 1992-01-08 00:00:00  | -2.83634 | -2.20965 | -0.110617 | 2.29596 | 3.19475 |
+-----------------------+-----------------------------+----------------------+----------+----------+-----------+---------+---------+
Show code
# 6.4 Signal flags and save artifacts for expanding
entry_z = float(backtest_spec["parameter_defaults"]["entry_z"])
exit_z = float(backtest_spec["parameter_defaults"]["exit_z"])
# Convert z-score to directional signal: +1 long residual, -1 short residual.
signal_raw = np.where(z <= -entry_z, 1, np.where(z >= entry_z, -1, 0))
signal_raw = np.where(z.isna(), 0, signal_raw)
signal_raw = pd.Series(signal_raw, index=z.index)

flags_index = z.index

flags_expanding = pd.DataFrame(
    {
        "in_sample": (flags_index >= sample_start) & (flags_index <= sample_end),
        "valid_data": valid_all.reindex(flags_index),
        "residual": residual_level.reindex(flags_index),
        "zscore": z.reindex(flags_index),
        "signal_raw": signal_raw.reindex(flags_index),
    },
    index=flags_index,
)

# Persist residuals, z-scores, and per-date flags for Section 7 backtests.
pd.DataFrame(residual_level).to_parquet(
    derived_dir / "residual_expanding.parquet", engine="pyarrow", index=True
)
pd.DataFrame(z).to_parquet(derived_dir / "zscore_expanding.parquet", engine="pyarrow", index=True)
flags_expanding.to_parquet(
    derived_dir / "signal_flags_expanding.parquet", engine="pyarrow", index=True
)

counts_expanding = pd.DataFrame(
    [
        {
            "n_days_long_signal": int((signal_raw == 1).sum()),
            "n_days_short_signal": int((signal_raw == -1).sum()),
            "n_days_zero": int((signal_raw == 0).sum()),
            "n_days_nan_z": int(z.isna().sum()),
        }
    ]
)
show_table(counts_expanding)
+----------------------+-----------------------+---------------+----------------+
|   n_days_long_signal |   n_days_short_signal |   n_days_zero |   n_days_nan_z |
|----------------------+-----------------------+---------------+----------------|
|                  723 |                   679 |          7606 |            503 |
+----------------------+-----------------------+---------------+----------------+
Show code
# 6.5 Plot residual and z score (expanding)
%matplotlib inline

section6_dir = Path("outputs/section_06")
section6_dir.mkdir(parents=True, exist_ok=True)

fig, ax = plt.subplots()
ax.plot(residual_level.index, residual_level.values)
ax.set_title("Expanding residual")
ax.set_xlabel("date")
ax.set_ylabel("cumulative residual return (proxy)")
fig.autofmt_xdate()
fig.savefig(section6_dir / "fig_residual_expanding.png", dpi=150, bbox_inches="tight")
plt.show()

fig, ax = plt.subplots()
ax.plot(z.index, z.values)
ax.axhline(entry_z)
ax.axhline(-entry_z)
ax.axhline(exit_z)
ax.axhline(-exit_z)
ax.set_title("Expanding z score")
ax.set_xlabel("date")
ax.set_ylabel("zscore")
fig.autofmt_xdate()
fig.savefig(section6_dir / "fig_zscore_expanding.png", dpi=150, bbox_inches="tight")
plt.show()

Show code
# 6.6 Repeat for rolling mode if weights file exists
weights_roll_path = derived_dir / "pca_weights_daily_rolling.parquet"
if weights_roll_path.exists():
    # Rolling mode mirrors expanding, just using rolling PCA weights.
    df_w_roll = pd.read_parquet(weights_roll_path)
    if not isinstance(df_w_roll.index, pd.DatetimeIndex) and "trade_date" in df_w_roll.columns:
        df_w_roll.index = pd.to_datetime(df_w_roll["trade_date"], errors="coerce")

    df_w_roll.index = ensure_naive_dates(df_w_roll.index)
    r_proxy_roll = duration_scaled_return_proxy
    trade_index_roll = r_proxy_roll.index
    df_w_roll = df_w_roll.reindex(trade_index_roll)
    valid_weights_roll = df_w_roll[butterfly_legs].notna().all(axis=1)
    valid_returns_roll = r_proxy_roll[butterfly_legs].notna().all(axis=1)
    valid_all_roll = valid_weights_roll & valid_returns_roll

    all_nan_legs_roll = r_proxy_roll[butterfly_legs].isna().all(axis=1)
    if all_nan_legs_roll.any():
        sample_roll = all_nan_legs_roll[all_nan_legs_roll].index[:5].tolist()
        warnings.warn(
            f"[rolling] Return proxy has all-NaN butterfly legs on {int(all_nan_legs_roll.sum())} dates "
            f"(sample={sample_roll}); residuals will be NaN on these dates."
        )

    residual_ret_roll = (df_w_roll * r_proxy_roll).sum(axis=1, min_count=1)
    residual_ret_roll = residual_ret_roll.where(valid_all_roll, np.nan)
    residual_level_roll = residual_ret_roll.cumsum()
    residual_level_roll.name = "residual"

    mean_r_roll = residual_level_roll.rolling(Wz, min_periods=Wz).mean()
    std_r_roll = residual_level_roll.rolling(Wz, min_periods=Wz).std(ddof=0)

    z_roll = (residual_level_roll - mean_r_roll) / std_r_roll
    z_roll = z_roll.where(std_r_roll > 0, np.nan)
    z_roll.name = "zscore"

    signal_raw_roll = np.where(z_roll <= -entry_z, 1, np.where(z_roll >= entry_z, -1, 0))
    signal_raw_roll = np.where(z_roll.isna(), 0, signal_raw_roll)
    signal_raw_roll = pd.Series(signal_raw_roll, index=z_roll.index)

    flags_index_roll = z_roll.index

    flags_rolling = pd.DataFrame(
        {
            "in_sample": (flags_index_roll >= sample_start) & (flags_index_roll <= sample_end),
            "valid_data": valid_all_roll.reindex(flags_index_roll),
            "residual": residual_level_roll.reindex(flags_index_roll),
            "zscore": z_roll.reindex(flags_index_roll),
            "signal_raw": signal_raw_roll.reindex(flags_index_roll),
        },
        index=flags_index_roll,
    )

    pd.DataFrame(residual_level_roll).to_parquet(
        derived_dir / "residual_rolling.parquet", engine="pyarrow", index=True
    )
    pd.DataFrame(z_roll).to_parquet(derived_dir / "zscore_rolling.parquet", engine="pyarrow", index=True)
    flags_rolling.to_parquet(
        derived_dir / "signal_flags_rolling.parquet", engine="pyarrow", index=True
    )

    fig, ax = plt.subplots()
    ax.plot(residual_level_roll.index, residual_level_roll.values)
    ax.set_title("Rolling residual")
    ax.set_xlabel("date")
    ax.set_ylabel("cumulative residual return (proxy)")
    fig.autofmt_xdate()
    fig.savefig(section6_dir / "fig_residual_rolling.png", dpi=150, bbox_inches="tight")
    plt.show()

    fig, ax = plt.subplots()
    ax.plot(z_roll.index, z_roll.values)
    ax.axhline(entry_z)
    ax.axhline(-entry_z)
    ax.axhline(exit_z)
    ax.axhline(-exit_z)
    ax.set_title("Rolling z score")
    ax.set_xlabel("date")
    ax.set_ylabel("zscore")
    fig.autofmt_xdate()
    fig.savefig(section6_dir / "fig_zscore_rolling.png", dpi=150, bbox_inches="tight")
    plt.show()
else:
    show_table(pd.DataFrame([{"rolling_outputs": "not present", "action": "skipping"}]))

Show code
# 6.6b Mean reversion diagnostics
def compute_mean_reversion_tests(series: pd.Series, variant: str) -> dict:
    s = series.dropna()
    if len(s) < 5:
        return {
            "variant": variant,
            "sample_start": None,
            "sample_end": None,
            "n_obs": int(len(s)),
            "adf_stat": np.nan,
            "adf_p": np.nan,
            "kpss_stat": np.nan,
            "kpss_p": np.nan,
            "ar1_phi": np.nan,
            "half_life_days": np.nan,
        }

    adf_stat = np.nan
    adf_p = np.nan
    kpss_stat = np.nan
    kpss_p = np.nan
    try:
        adf_res = adfuller(s.values, autolag="AIC")
        adf_stat = float(adf_res[0])
        adf_p = float(adf_res[1])
    except Exception:
        pass
    try:
        kpss_res = kpss(s.values, regression="c", nlags="auto")
        kpss_stat = float(kpss_res[0])
        kpss_p = float(kpss_res[1])
    except Exception:
        pass

    ar1_phi = np.nan
    half_life = np.nan
    try:
        y = s.iloc[1:].values
        x = s.iloc[:-1].values
        X = sm.add_constant(x)
        res = sm.OLS(y, X).fit()
        ar1_phi = float(res.params[1])
        if np.isfinite(ar1_phi) and 0 < ar1_phi < 1:
            half_life = float(-np.log(2.0) / np.log(ar1_phi))
    except Exception:
        pass

    return {
        "variant": variant,
        "sample_start": s.index.min(),
        "sample_end": s.index.max(),
        "n_obs": int(len(s)),
        "adf_stat": adf_stat,
        "adf_p": adf_p,
        "kpss_stat": kpss_stat,
        "kpss_p": kpss_p,
        "ar1_phi": ar1_phi,
        "half_life_days": half_life,
    }

tests_rows = [compute_mean_reversion_tests(residual_level, "expanding")]
if weights_roll_path.exists():
    tests_rows.append(compute_mean_reversion_tests(residual_level_roll, "rolling"))

mean_reversion_tests = pd.DataFrame(tests_rows)
section6_dir = Path("outputs/section_06")
section6_dir.mkdir(parents=True, exist_ok=True)
write_df_csv_and_md(
    mean_reversion_tests,
    section6_dir / "mean_reversion_tests.csv",
    section6_dir / "mean_reversion_tests.md",
)
show_table(mean_reversion_tests)
/var/folders/8b/dng58k4174jcldvgrm6q_mzr0000gn/T/ipykernel_12823/3136655965.py:29: InterpolationWarning: The test statistic is outside of the range of p-values available in the
look-up table. The actual p-value is smaller than the p-value returned.

  kpss_res = kpss(s.values, regression="c", nlags="auto")
+-----------+---------------------+---------------------+---------+------------+----------+-------------+----------+-----------+------------------+
| variant   | sample_start        | sample_end          |   n_obs |   adf_stat |    adf_p |   kpss_stat |   kpss_p |   ar1_phi |   half_life_days |
|-----------+---------------------+---------------------+---------+------------+----------+-------------+----------+-----------+------------------|
| expanding | 1991-01-07 00:00:00 | 2026-01-16 00:00:00 |    8756 |   -1.95249 | 0.307783 |     6.072   |     0.01 |  0.998698 |          532.194 |
| rolling   | 1993-01-12 00:00:00 | 2026-01-16 00:00:00 |    8252 |   -1.10086 | 0.714728 |     3.15582 |     0.01 |  0.999228 |          897.683 |
+-----------+---------------------+---------------------+---------+------------+----------+-------------+----------+-----------+------------------+
/var/folders/8b/dng58k4174jcldvgrm6q_mzr0000gn/T/ipykernel_12823/3136655965.py:29: InterpolationWarning: The test statistic is outside of the range of p-values available in the
look-up table. The actual p-value is smaller than the p-value returned.

  kpss_res = kpss(s.values, regression="c", nlags="auto")
Show code
# 6.7 Sanity checks for causality and alignment
def first_non_null(series: pd.Series):
    return series.first_valid_index()


rows_sanity = []

first_weights_exp = df_w.dropna(how="any").index.min() if len(df_w) else None
first_resid_exp = first_non_null(residual_level)
first_z_exp = first_non_null(z)

rows_sanity.append(
    {
        "mode": "expanding",
        "first_date_weights_non_null": first_weights_exp,
        "first_date_residual_non_null": first_resid_exp,
        "first_date_z_non_null": first_z_exp,
    }
)

if weights_roll_path.exists():
    first_weights_roll = df_w_roll.dropna(how="any").index.min() if len(df_w_roll) else None
    first_resid_roll = first_non_null(residual_level_roll)
    first_z_roll = first_non_null(z_roll)
    rows_sanity.append(
        {
            "mode": "rolling",
            "first_date_weights_non_null": first_weights_roll,
            "first_date_residual_non_null": first_resid_roll,
            "first_date_z_non_null": first_z_roll,
        }
    )

sanity_dates = pd.DataFrame(rows_sanity)
show_table(sanity_dates)

# Spot-check that manual_mean excludes the test date and therefore must match mean_series shifted by 1 at t minus 1.
def causality_check(r_series: pd.Series, mean_series: pd.Series, window: int) -> dict:
    if r_series.notna().sum() == 0:
        return {"test_date": None, "manual_mean": np.nan, "shifted_mean": np.nan}
    z_candidates = mean_series.dropna().index
    if len(z_candidates) == 0:
        return {"test_date": None, "manual_mean": np.nan, "shifted_mean": np.nan}
    test_date = z_candidates[len(z_candidates) // 2]
    window_slice = r_series.loc[:test_date].iloc[-(window + 1) : -1]
    manual_mean = float(window_slice.mean()) if window_slice.notna().sum() == window else np.nan
    shifted_val = mean_series.shift(1).loc[test_date]
    shifted_mean = float(shifted_val) if pd.notna(shifted_val) else np.nan
    return {"test_date": test_date, "manual_mean": manual_mean, "shifted_mean": shifted_mean}


tol = 1e-8
check_exp = causality_check(residual_level, mean_r, Wz)
rows_check = [
    {
        "mode": "expanding",
        "test_date": check_exp["test_date"],
        "manual_mean": check_exp["manual_mean"],
        "shifted_mean": check_exp["shifted_mean"],
        "abs_diff": abs(check_exp["manual_mean"] - check_exp["shifted_mean"])
        if np.isfinite(check_exp["manual_mean"]) and np.isfinite(check_exp["shifted_mean"])
        else np.nan,
        "within_tol": bool(
            np.isfinite(check_exp["manual_mean"])
            and np.isfinite(check_exp["shifted_mean"])
            and abs(check_exp["manual_mean"] - check_exp["shifted_mean"]) <= tol
        ),
    }
]

if weights_roll_path.exists():
    check_roll = causality_check(residual_level_roll, mean_r_roll, Wz)
    rows_check.append(
        {
            "mode": "rolling",
            "test_date": check_roll["test_date"],
            "manual_mean": check_roll["manual_mean"],
            "shifted_mean": check_roll["shifted_mean"],
            "abs_diff": abs(check_roll["manual_mean"] - check_roll["shifted_mean"])
            if np.isfinite(check_roll["manual_mean"]) and np.isfinite(check_roll["shifted_mean"])
            else np.nan,
            "within_tol": bool(
                np.isfinite(check_roll["manual_mean"])
                and np.isfinite(check_roll["shifted_mean"])
                and abs(check_roll["manual_mean"] - check_roll["shifted_mean"]) <= tol
            ),
        }
    )

causality_table = pd.DataFrame(rows_check)
show_table(causality_table)
write_df_csv_and_md(
    causality_table,
    section6_dir / "causality_check.csv",
    section6_dir / "causality_check.md",
)
+-----------+-------------------------------+--------------------------------+-------------------------+
| mode      | first_date_weights_non_null   | first_date_residual_non_null   | first_date_z_non_null   |
|-----------+-------------------------------+--------------------------------+-------------------------|
| expanding | 1991-01-07 00:00:00           | 1991-01-07 00:00:00            | 1992-01-08 00:00:00     |
| rolling   | 1993-01-12 00:00:00           | 1993-01-12 00:00:00            | 1994-01-12 00:00:00     |
+-----------+-------------------------------+--------------------------------+-------------------------+
+-----------+---------------------+---------------+----------------+-------------+--------------+
| mode      | test_date           |   manual_mean |   shifted_mean |    abs_diff | within_tol   |
|-----------+---------------------+---------------+----------------+-------------+--------------|
| expanding | 2009-01-14 00:00:00 |     0.0712762 |      0.0712762 | 0           | True         |
| rolling   | 2010-01-19 00:00:00 |     0.0801659 |      0.0801659 | 1.38778e-17 | True         |
+-----------+---------------------+---------------+----------------+-------------+--------------+

7. Trading rules and portfolio simulation

Show code
# 7.0 Helpers for Section 7
def run_state_machine(
    z_series: pd.Series,
    entry_z: float,
    exit_z: float,
    max_holding: int,
    valid_mask: pd.Series,
) -> pd.DataFrame:
    # State machine uses prior-day z to avoid same-day look-ahead in entries/exits.
    dates = z_series.index
    z_prev = z_series.shift(1)
    state_vals = []
    holding_vals = []
    prev_state = 0
    prev_hold = 0
    for t in dates:
        if not bool(valid_mask.loc[t]):
            state_t = 0
            hold_t = 0
        else:
            z_val = float(z_prev.loc[t])
            if prev_state == 0:
                if z_val <= -entry_z:
                    state_t = 1
                    hold_t = 1
                elif z_val >= entry_z:
                    state_t = -1
                    hold_t = 1
                else:
                    state_t = 0
                    hold_t = 0
            elif prev_state == 1:
                if (z_val >= -exit_z) or (prev_hold >= max_holding):
                    state_t = 0
                    hold_t = 0
                else:
                    state_t = 1
                    hold_t = prev_hold + 1
            else:
                if (z_val <= exit_z) or (prev_hold >= max_holding):
                    state_t = 0
                    hold_t = 0
                else:
                    state_t = -1
                    hold_t = prev_hold + 1
        state_vals.append(state_t)
        holding_vals.append(hold_t)
        prev_state = state_t
        prev_hold = hold_t
    return pd.DataFrame({"state": state_vals, "holding_days": holding_vals}, index=dates)


def build_trade_list(bt_daily: pd.DataFrame) -> pd.DataFrame:
    # Aggregate contiguous non-zero position spans into trade-level records.
    state = bt_daily["position_state"]
    zscore = bt_daily["zscore"]
    z_prev = zscore.shift(1)
    pnl_gross = bt_daily["pnl_gross"]
    pnl_net = bt_daily["pnl_net"]

    trades = []
    in_trade = False
    trade_id = 0
    entry_date = None
    entry_side = None

    for t in state.index:
        s = int(state.loc[t])
        if not in_trade and s != 0:
            in_trade = True
            trade_id += 1
            entry_date = t
            entry_side = "long" if s == 1 else "short"
        elif in_trade and s == 0:
            exit_date = t
            trade_slice = bt_daily.loc[entry_date:exit_date]
            active_days = int((trade_slice["position_state"] != 0).sum())
            cum_gross = float(trade_slice["pnl_gross"].sum())
            cum_net = float(trade_slice["pnl_net"].sum())
            cum_path = trade_slice["pnl_net"].cumsum()
            max_adverse = float(cum_path.min()) if len(cum_path) else np.nan
            max_favorable = float(cum_path.max()) if len(cum_path) else np.nan
            trades.append(
                {
                    "trade_id": trade_id,
                    "side": entry_side,
                    "entry_date": entry_date,
                    "exit_date": exit_date,
                    "entry_z": float(z_prev.loc[entry_date]) if pd.notna(z_prev.loc[entry_date]) else np.nan,
                    "exit_z": float(z_prev.loc[exit_date]) if pd.notna(z_prev.loc[exit_date]) else np.nan,
                    "holding_days": active_days,
                    "cum_pnl_gross": cum_gross,
                    "cum_pnl_net": cum_net,
                    "max_adverse": max_adverse,
                    "max_favorable": max_favorable,
                }
            )
            in_trade = False
            entry_date = None
            entry_side = None

    return pd.DataFrame(trades)
Show code
# 7.1 Load inputs and construct aligned dv01 style return panel

derived_dir = Path("data/derived")
spec_path = derived_dir / "backtest_spec.json"
if not spec_path.exists():
    raise FileNotFoundError(spec_path)

with open(spec_path, "r") as f:
    backtest_spec = json.load(f)

tenors = list(backtest_spec["tenors"])

durations_path = derived_dir / "duration_assumptions.parquet"
if not durations_path.exists():
    raise FileNotFoundError(durations_path)

durations = pd.read_parquet(durations_path)
duration_panel_path = derived_dir / "duration_panel.parquet"
if not duration_panel_path.exists():
    raise FileNotFoundError(duration_panel_path)
duration_panel = pd.read_parquet(duration_panel_path)
duration_panel.index = ensure_naive_dates(duration_panel.index)
duration_panel = duration_panel.reindex(columns=tenors)

curve_path = derived_dir / "curve_treasury_par_canonical.parquet"
if not curve_path.exists():
    raise FileNotFoundError(curve_path)

df_curve = pd.read_parquet(curve_path)
if "date" in df_curve.columns:
    df_curve["date"] = pd.to_datetime(df_curve["date"], errors="coerce")
    df_curve = df_curve.set_index("date")
df_curve.index = ensure_naive_dates(df_curve.index)
df_curve = df_curve.sort_index()

try:
    df_raw
    _ = df_raw
except NameError:
    candidates = list(Path("data").rglob("*treasury*par*.parquet"))
    if not candidates:
        raise FileNotFoundError("raw treasury par curve parquet not found")
    df_raw = pd.read_parquet(candidates[0])
    if "date" in df_raw.columns:
        df_raw["date"] = pd.to_datetime(df_raw["date"], errors="coerce")
        df_raw = df_raw.set_index("date")
    df_raw.index = ensure_naive_dates(df_raw.index)
    df_raw = df_raw.sort_index()

idx_overlap = df_curve.index.intersection(df_raw.index)
df_yields = df_curve.loc[idx_overlap, tenors].copy()
dy_decimal, duration_scaled_return_proxy = build_dv01_returns(df_yields, duration_panel)

# Document trading convention: positions at date t use weights and signals based on data through t minus 1.
timing_table = pd.DataFrame(
    [
        {"item": "signal_used", "value": "zscore shifted 1 observation (use z_{t-1} for decisions at t)"},
        {"item": "weights_used", "value": "weights shifted 1 observation (use w_{t-1} at t)"},
        {"item": "pnl_day_t", "value": "pnl at date t uses dv01_return_t from yields_{t} - yields_{t-1}"},
    ]
)

align_table = pd.DataFrame(
    [
        {
            "start": duration_scaled_return_proxy.index.min(),
            "end": duration_scaled_return_proxy.index.max(),
            "n_days": int(len(duration_scaled_return_proxy)),
            "n_tenors": int(duration_scaled_return_proxy.shape[1]),
        }
    ]
)

show_table(timing_table)
show_table(align_table)
+--------------+-----------------------------------------------------------------+
| item         | value                                                           |
|--------------+-----------------------------------------------------------------|
| signal_used  | zscore shifted 1 observation (use z_{t-1} for decisions at t)   |
| weights_used | weights shifted 1 observation (use w_{t-1} at t)                |
| pnl_day_t    | pnl at date t uses dv01_return_t from yields_{t} - yields_{t-1} |
+--------------+-----------------------------------------------------------------+
+---------------------+---------------------+----------+------------+
| start               | end                 |   n_days |   n_tenors |
|---------------------+---------------------+----------+------------|
| 1990-01-03 00:00:00 | 2026-01-16 00:00:00 |     9008 |          8 |
+---------------------+---------------------+----------+------------+
Show code
# 7.2 Load expanding artifacts and build state machine inputs
flags_exp_path = derived_dir / "signal_flags_expanding.parquet"
weights_exp_path = derived_dir / "pca_weights_daily_expanding.parquet"
resid_exp_path = derived_dir / "residual_expanding.parquet"
z_exp_path = derived_dir / "zscore_expanding.parquet"

for p in [flags_exp_path, weights_exp_path, resid_exp_path, z_exp_path]:
    if not p.exists():
        raise FileNotFoundError(p)

flags_exp = pd.read_parquet(flags_exp_path)
weights_exp = pd.read_parquet(weights_exp_path)
resid_exp = pd.read_parquet(resid_exp_path)
z_exp = pd.read_parquet(z_exp_path)

if not isinstance(weights_exp.index, pd.DatetimeIndex) and "trade_date" in weights_exp.columns:
    weights_exp.index = pd.to_datetime(weights_exp["trade_date"], errors="coerce")
if not isinstance(flags_exp.index, pd.DatetimeIndex) and "trade_date" in flags_exp.columns:
    flags_exp.index = pd.to_datetime(flags_exp["trade_date"], errors="coerce")
if not isinstance(resid_exp.index, pd.DatetimeIndex) and "trade_date" in resid_exp.columns:
    resid_exp.index = pd.to_datetime(resid_exp["trade_date"], errors="coerce")
if not isinstance(z_exp.index, pd.DatetimeIndex) and "trade_date" in z_exp.columns:
    z_exp.index = pd.to_datetime(z_exp["trade_date"], errors="coerce")

idx_bt = duration_scaled_return_proxy.index.intersection(weights_exp.index).intersection(z_exp.index)

return_proxy_bt = duration_scaled_return_proxy.reindex(idx_bt)
weights_bt = weights_exp.reindex(idx_bt)
resid_bt = resid_exp.reindex(idx_bt)
z_bt = z_exp.reindex(idx_bt)
flags_bt = flags_exp.reindex(idx_bt)

# Valid dates require weights, return proxies, and prior-day z-score.
weights_valid = weights_bt.notna().all(axis=1)
z_prev = z_bt.squeeze().shift(1)
return_proxy_valid = return_proxy_bt.notna().all(axis=1)

valid_mask_exp = weights_valid & return_proxy_valid & z_prev.notna()

valid_summary = pd.DataFrame(
    [
        {
            "n_dates": int(len(idx_bt)),
            "n_valid": int(valid_mask_exp.sum()),
            "first_valid": valid_mask_exp[valid_mask_exp].index.min(),
            "last_valid": valid_mask_exp[valid_mask_exp].index.max(),
        }
    ]
)

show_table(valid_summary)
+-----------+-----------+---------------------+---------------------+
|   n_dates |   n_valid | first_valid         | last_valid          |
|-----------+-----------+---------------------+---------------------|
|      8756 |      8504 | 1992-01-09 00:00:00 | 2026-01-16 00:00:00 |
+-----------+-----------+---------------------+---------------------+
Show code
# 7.3 Implement state machine for expanding
params = backtest_spec["parameter_defaults"]
entry_z = float(params["entry_z"])
exit_z = float(params["exit_z"])
max_holding_obs = int(params["max_holding_obs"])

z_series_exp = z_bt.squeeze()
state_exp = run_state_machine(z_series_exp, entry_z, exit_z, max_holding_obs, valid_mask_exp)

counts_exp = pd.DataFrame(
    [
        {
            "n_long": int((state_exp["state"] == 1).sum()),
            "n_short": int((state_exp["state"] == -1).sum()),
            "n_flat": int((state_exp["state"] == 0).sum()),
        }
    ]
)

show_table(counts_exp)
+----------+-----------+----------+
|   n_long |   n_short |   n_flat |
|----------+-----------+----------|
|     1952 |      1535 |     5269 |
+----------+-----------+----------+
Show code
# 7.4 Compute position vectors, pnl_gross, turnover, cost, pnl_net (expanding)
# Position vector = PCA weights scaled by discrete state (+1/-1/0).
pos_vec_exp = weights_bt.mul(state_exp["state"], axis=0)
pos_vec_exp = pos_vec_exp.where(state_exp["state"] != 0, 0.0)

return_proxy_adj = return_proxy_bt
pnl_gross_exp = (pos_vec_exp * return_proxy_adj).sum(axis=1, min_count=1)
pnl_gross_exp = pnl_gross_exp.where(valid_mask_exp, 0.0)

turnover_exp = 0.5 * pos_vec_exp.sub(pos_vec_exp.shift(1)).abs().sum(axis=1)
turnover_exp = turnover_exp.where(valid_mask_exp, 0.0)

cost_per_turnover = float(params["cost_per_turnover"])
# Trading cost is linear in turnover for a simple implementation.
cost_exp = turnover_exp * cost_per_turnover
pnl_net_exp = pnl_gross_exp - cost_exp

bt_daily_exp = pd.DataFrame(
    {
        "residual": resid_bt.squeeze(),
        "zscore": z_series_exp,
        "signal_raw": flags_bt["signal_raw"],
        "position_state": state_exp["state"],
        "position_sign": state_exp["state"],
        "pnl_gross": pnl_gross_exp,
        "turnover": turnover_exp,
        "cost": cost_exp,
        "pnl_net": pnl_net_exp,
    },
    index=idx_bt,
)

bt_daily_exp.to_parquet(derived_dir / "bt_daily_expanding.parquet", engine="pyarrow", index=True)

def summarize_pnl(pnl_series: pd.Series, label: str) -> dict:
    # Simple summary stats (mean/std/Sharpe proxy) for quick diagnostics.
    mean = float(pnl_series.mean()) if len(pnl_series) else np.nan
    std = float(pnl_series.std(ddof=0)) if len(pnl_series) else np.nan
    sharpe = float(mean / std * np.sqrt(252)) if std and np.isfinite(std) else np.nan
    return {
        "series": label,
        "mean": mean,
        "std": std,
        "sharpe_proxy": sharpe,
        "min": float(pnl_series.min()) if len(pnl_series) else np.nan,
        "max": float(pnl_series.max()) if len(pnl_series) else np.nan,
    }

summary_pnl = pd.DataFrame(
    [summarize_pnl(pnl_gross_exp, "gross"), summarize_pnl(pnl_net_exp, "net")]
)

show_table(summary_pnl)
+----------+--------------+------------+----------------+------------+-----------+
| series   |         mean |        std |   sharpe_proxy |        min |       max |
|----------+--------------+------------+----------------+------------+-----------|
| gross    | -2.63316e-06 | 0.00123203 |     -0.0339278 | -0.0154247 | 0.0254367 |
| net      | -8.93472e-06 | 0.00123218 |     -0.115108  | -0.0154247 | 0.0254367 |
+----------+--------------+------------+----------------+------------+-----------+
Show code
# 7.4b Optional volatility targeting (expanding)
vol_target_enabled = bool(params.get("vol_target_enabled", False))
vol_target_ann = float(params.get("vol_target_ann", 0.05))
vol_target_window = int(params.get("vol_target_window", 63))
vol_target_cap = float(params.get("vol_target_cap", 3.0))

if vol_target_enabled:
    # Scale positions so realized vol tracks target, with a cap for stability.
    realized_vol = pnl_gross_exp.rolling(vol_target_window, min_periods=vol_target_window).std(ddof=0)
    realized_ann = realized_vol * np.sqrt(252)
    scale = vol_target_ann / realized_ann
    scale = scale.shift(1)
    scale = scale.clip(upper=vol_target_cap)
    scale = scale.where(valid_mask_exp & scale.notna(), 0.0)

    pos_vec_exp = pos_vec_exp.mul(scale, axis=0)
    pnl_gross_exp = (pos_vec_exp * return_proxy_adj).sum(axis=1, min_count=1)
    pnl_gross_exp = pnl_gross_exp.where(valid_mask_exp, 0.0)
    turnover_exp = 0.5 * pos_vec_exp.sub(pos_vec_exp.shift(1)).abs().sum(axis=1)
    turnover_exp = turnover_exp.where(valid_mask_exp, 0.0)
    cost_exp = turnover_exp * cost_per_turnover
    pnl_net_exp = pnl_gross_exp - cost_exp

    bt_daily_exp["pnl_gross"] = pnl_gross_exp
    bt_daily_exp["turnover"] = turnover_exp
    bt_daily_exp["cost"] = cost_exp
    bt_daily_exp["pnl_net"] = pnl_net_exp

    vol_summary = pd.DataFrame(
        [
            {
                "enabled": True,
                "target_ann_vol": vol_target_ann,
                "window": vol_target_window,
                "cap": vol_target_cap,
                "scale_median": float(scale.replace([np.inf, -np.inf], np.nan).median()),
                "scale_p95": float(scale.replace([np.inf, -np.inf], np.nan).quantile(0.95)),
            }
        ]
    )
else:
    vol_summary = pd.DataFrame(
        [
            {
                "enabled": False,
                "target_ann_vol": vol_target_ann,
                "window": vol_target_window,
                "cap": vol_target_cap,
                "scale_median": np.nan,
                "scale_p95": np.nan,
            }
        ]
    )

show_table(vol_summary)

section8_dir = Path("outputs/section_08")
section8_dir.mkdir(parents=True, exist_ok=True)
turnover_strategy_exp = pd.DataFrame(
    {"date": bt_daily_exp.index, "turnover_strategy": bt_daily_exp["turnover"].values}
)
turnover_strategy_exp.to_csv(
    section8_dir / "turnover_strategy_daily_expanding.csv", index=False
)
+-----------+------------------+----------+-------+----------------+-------------+
| enabled   |   target_ann_vol |   window |   cap |   scale_median |   scale_p95 |
|-----------+------------------+----------+-------+----------------+-------------|
| False     |             0.05 |       63 |     3 |            nan |         nan |
+-----------+------------------+----------+-------+----------------+-------------+
Show code
# 7.5 Build trade list (expanding)
# Trade list collapses daily series into entry/exit-level diagnostics.
trade_list_exp = build_trade_list(bt_daily_exp)
trade_list_exp.to_parquet(
    derived_dir / "bt_trade_list_expanding.parquet", engine="pyarrow", index=False
)

if len(trade_list_exp):
    show_table(trade_list_exp.head(5))
    show_table(trade_list_exp.tail(5))
    summary_trades = pd.DataFrame(
        [
            {
                "variant": "expanding",
                "n_trades": int(len(trade_list_exp)),
                "avg_hold_days": float(trade_list_exp["holding_days"].mean()),
                "trade_hit_rate": float((trade_list_exp["cum_pnl_net"] > 0).mean()),
                "avg_abs_z_entry": float(trade_list_exp["entry_z"].abs().mean()),
                "p95_hold_days": float(trade_list_exp["holding_days"].quantile(0.95)),
            }
        ]
    )
else:
    summary_trades = pd.DataFrame(
        [
            {
                "variant": "expanding",
                "n_trades": 0,
                "avg_hold_days": np.nan,
                "trade_hit_rate": np.nan,
                "avg_abs_z_entry": np.nan,
                "p95_hold_days": np.nan,
            }
        ]
    )

show_table(summary_trades)
+------------+--------+---------------------+---------------------+-----------+------------+----------------+-----------------+---------------+---------------+-----------------+
|   trade_id | side   | entry_date          | exit_date           |   entry_z |     exit_z |   holding_days |   cum_pnl_gross |   cum_pnl_net |   max_adverse |   max_favorable |
|------------+--------+---------------------+---------------------+-----------+------------+----------------+-----------------+---------------+---------------+-----------------|
|          1 | short  | 1992-01-14 00:00:00 | 1992-04-09 00:00:00 |   2.16806 |  2.28351   |             60 |     -0.00537589 |   -0.00604548 |   -0.00604548 |      0.0112429  |
|          2 | short  | 1992-04-10 00:00:00 | 1992-07-08 00:00:00 |   2.37034 |  0.996596  |             60 |      0.00327438 |    0.0027865  |   -0.00307409 |      0.00416385 |
|          3 | short  | 1993-01-04 00:00:00 | 1993-02-04 00:00:00 |   2.30511 | -0.0659652 |             22 |      0.00821286 |    0.00778124 |    0.00214457 |      0.00798958 |
|          4 | long   | 1993-06-01 00:00:00 | 1993-08-16 00:00:00 |  -2.44185 |  0.143325  |             53 |      0.0065444  |    0.0060088  |   -0.00338935 |      0.00626155 |
|          5 | long   | 1993-10-14 00:00:00 | 1994-01-11 00:00:00 |  -2.03657 | -0.898834  |             60 |      0.00191663 |    0.00129712 |   -0.00315472 |      0.00403326 |
+------------+--------+---------------------+---------------------+-----------+------------+----------------+-----------------+---------------+---------------+-----------------+
+------------+--------+---------------------+---------------------+-----------+----------+----------------+-----------------+---------------+---------------+-----------------+
|   trade_id | side   | entry_date          | exit_date           |   entry_z |   exit_z |   holding_days |   cum_pnl_gross |   cum_pnl_net |   max_adverse |   max_favorable |
|------------+--------+---------------------+---------------------+-----------+----------+----------------+-----------------+---------------+---------------+-----------------|
|         68 | long   | 2022-03-04 00:00:00 | 2022-05-31 00:00:00 |  -2.18081 | -1.78476 |             60 |     -0.00730388 |   -0.00774019 |   -0.00993163 |     0.000212298 |
|         69 | long   | 2022-06-13 00:00:00 | 2022-09-08 00:00:00 |  -2.03979 | -1.60511 |             60 |     -0.0135292  |   -0.0139624  |   -0.0173768  |    -0.00168006  |
|         70 | long   | 2022-11-07 00:00:00 | 2023-02-06 00:00:00 |  -2.00598 | -1.51535 |             60 |     -0.0063734  |   -0.00680169 |   -0.00685087 |     0.000834852 |
|         71 | short  | 2024-08-05 00:00:00 | 2024-10-30 00:00:00 |   2.68774 |  2.18546 |             60 |     -0.00617556 |   -0.00658819 |   -0.00848438 |     0.000986787 |
|         72 | short  | 2024-10-31 00:00:00 | 2025-01-30 00:00:00 |   2.15403 |  1.55081 |             60 |     -0.00231593 |   -0.00272731 |   -0.0037842  |     0.00251035  |
+------------+--------+---------------------+---------------------+-----------+----------+----------------+-----------------+---------------+---------------+-----------------+
+-----------+------------+-----------------+------------------+-------------------+-----------------+
| variant   |   n_trades |   avg_hold_days |   trade_hit_rate |   avg_abs_z_entry |   p95_hold_days |
|-----------+------------+-----------------+------------------+-------------------+-----------------|
| expanding |         72 |         48.4306 |         0.486111 |           2.27793 |              60 |
+-----------+------------+-----------------+------------------+-------------------+-----------------+
Show code
# 7.6 Exposure diagnostics vs PC1 and PC2 (expanding)
# Evaluate residual strategy exposure to PC1/PC2 via rolling corr/beta.
loadings_exp_path = derived_dir / "pca_loadings_daily_expanding.parquet"
if not loadings_exp_path.exists():
    raise FileNotFoundError(loadings_exp_path)

loadings_exp = pd.read_parquet(loadings_exp_path)
if "trade_date" in loadings_exp.columns:
    loadings_exp["trade_date"] = pd.to_datetime(loadings_exp["trade_date"], errors="coerce")
means_exp_path = derived_dir / "pca_means_daily_expanding.parquet"
if not means_exp_path.exists():
    raise FileNotFoundError(means_exp_path)
means_exp = pd.read_parquet(means_exp_path)
if not isinstance(means_exp.index, pd.DatetimeIndex) and "trade_date" in means_exp.columns:
    means_exp.index = pd.to_datetime(means_exp["trade_date"], errors="coerce")

pc1 = loadings_exp.loc[loadings_exp["pc"] == 1].pivot(
    index="trade_date", columns="tenor", values="loading"
)
pc2 = loadings_exp.loc[loadings_exp["pc"] == 2].pivot(
    index="trade_date", columns="tenor", values="loading"
)

pc1 = pc1.reindex(idx_bt).ffill()
pc2 = pc2.reindex(idx_bt).ffill()

r_t = duration_scaled_return_proxy.reindex(idx_bt)[tenors]
mu_t = means_exp.reindex(idx_bt)[tenors]
r_centered = r_t - mu_t

f1 = (pc1[tenors] * r_centered).sum(axis=1, min_count=1)
f2 = (pc2[tenors] * r_centered).sum(axis=1, min_count=1)

pnl_for_diag = pnl_gross_exp.where(valid_mask_exp, np.nan)
window = 252

corr_pnl_pc1 = pnl_for_diag.rolling(window, min_periods=window).corr(f1).shift(1)
corr_pnl_pc2 = pnl_for_diag.rolling(window, min_periods=window).corr(f2).shift(1)

beta_pnl_pc1 = (
    pnl_for_diag.rolling(window, min_periods=window).cov(f1)
    / f1.rolling(window, min_periods=window).var()
).shift(1)
beta_pnl_pc2 = (
    pnl_for_diag.rolling(window, min_periods=window).cov(f2)
    / f2.rolling(window, min_periods=window).var()
).shift(1)

exposure_diag_exp = pd.DataFrame(
    {
        "date": idx_bt,
        "corr_pnl_pc1": corr_pnl_pc1.values,
        "corr_pnl_pc2": corr_pnl_pc2.values,
        "beta_pnl_pc1": beta_pnl_pc1.values,
        "beta_pnl_pc2": beta_pnl_pc2.values,
    }
)

exposure_diag_exp.to_parquet(
    derived_dir / "bt_exposure_diag_expanding.parquet", engine="pyarrow", index=False
)

beta_summary = pd.DataFrame(
    [
        {
            "metric": "pc1",
            "median_abs_beta": float(exposure_diag_exp["beta_pnl_pc1"].abs().median()),
            "p95_abs_beta": float(exposure_diag_exp["beta_pnl_pc1"].abs().quantile(0.95)),
        },
        {
            "metric": "pc2",
            "median_abs_beta": float(exposure_diag_exp["beta_pnl_pc2"].abs().median()),
            "p95_abs_beta": float(exposure_diag_exp["beta_pnl_pc2"].abs().quantile(0.95)),
        },
    ]
)

show_table(beta_summary)
+----------+-------------------+----------------+
| metric   |   median_abs_beta |   p95_abs_beta |
|----------+-------------------+----------------|
| pc1      |         0.0156356 |      0.0718023 |
| pc2      |         0.0804524 |      0.412114  |
+----------+-------------------+----------------+
Show code
# 7.7 Figures (expanding)
%matplotlib inline

section7_dir = Path("outputs/section_07")
section7_dir.mkdir(parents=True, exist_ok=True)

cum_gross = pnl_gross_exp.cumsum()
cum_net = pnl_net_exp.cumsum()

fig, ax = plt.subplots()
ax.plot(cum_gross.index, cum_gross.values, label="gross")
ax.plot(cum_net.index, cum_net.values, label="net")
ax.set_title("Expanding equity curve")
ax.set_xlabel("date")
ax.set_ylabel("cumulative return (proxy)")
ax.legend()
fig.autofmt_xdate()
fig.savefig(section7_dir / "fig_equity_curve_expanding.png", dpi=150, bbox_inches="tight")
plt.show()

running_max = cum_net.cummax()
drawdown = cum_net - running_max

fig, ax = plt.subplots()
ax.plot(drawdown.index, drawdown.values)
ax.set_title("Expanding drawdown")
ax.set_xlabel("date")
ax.set_ylabel("drawdown (proxy)")
fig.autofmt_xdate()
fig.savefig(section7_dir / "fig_drawdown_expanding.png", dpi=150, bbox_inches="tight")
plt.show()

fig, ax = plt.subplots()
ax.hist(pnl_net_exp.dropna().values, bins=50)
ax.set_title("Expanding pnl_net histogram")
ax.set_xlabel("pnl_net")
ax.set_ylabel("count")
fig.savefig(section7_dir / "fig_pnl_hist_expanding.png", dpi=150, bbox_inches="tight")
plt.show()

fig, ax = plt.subplots()
ax.plot(state_exp.index, state_exp["state"].values)
ax.set_title("Expanding position state")
ax.set_xlabel("date")
ax.set_ylabel("state")
fig.autofmt_xdate()
fig.savefig(section7_dir / "fig_positions_expanding.png", dpi=150, bbox_inches="tight")
plt.show()

Show code
# 7.8 Repeat for rolling mode if artifacts exist
flags_roll_path = derived_dir / "signal_flags_rolling.parquet"
weights_roll_path = derived_dir / "pca_weights_daily_rolling.parquet"
loadings_roll_path = derived_dir / "pca_loadings_daily_rolling.parquet"

if flags_roll_path.exists() and weights_roll_path.exists() and loadings_roll_path.exists():
    # Rolling backtest mirrors expanding logic, swapping in rolling artifacts.
    flags_roll = pd.read_parquet(flags_roll_path)
    weights_roll = pd.read_parquet(weights_roll_path)
    if not isinstance(weights_roll.index, pd.DatetimeIndex) and "trade_date" in weights_roll.columns:
        weights_roll.index = pd.to_datetime(weights_roll["trade_date"], errors="coerce")
    if not isinstance(flags_roll.index, pd.DatetimeIndex) and "trade_date" in flags_roll.columns:
        flags_roll.index = pd.to_datetime(flags_roll["trade_date"], errors="coerce")

    idx_bt_roll = duration_scaled_return_proxy.index.intersection(weights_roll.index).intersection(flags_roll.index)
    return_proxy_bt_roll = duration_scaled_return_proxy.reindex(idx_bt_roll)
    weights_bt_roll = weights_roll.reindex(idx_bt_roll)
    z_roll = flags_roll["zscore"].reindex(idx_bt_roll)
    resid_roll = flags_roll["residual"].reindex(idx_bt_roll)

    weights_valid_roll = weights_bt_roll.notna().all(axis=1)
    z_prev_roll = z_roll.shift(1)
    return_proxy_valid_roll = return_proxy_bt_roll.notna().all(axis=1)

    valid_mask_roll = weights_valid_roll & z_prev_roll.notna() & return_proxy_valid_roll

    state_roll = run_state_machine(z_roll, entry_z, exit_z, max_holding_obs, valid_mask_roll)

    pos_vec_roll = weights_bt_roll.mul(state_roll["state"], axis=0)
    pos_vec_roll = pos_vec_roll.where(state_roll["state"] != 0, 0.0)
    return_proxy_adj_roll = return_proxy_bt_roll
    pnl_gross_roll = (pos_vec_roll * return_proxy_adj_roll).sum(axis=1, min_count=1)
    pnl_gross_roll = pnl_gross_roll.where(valid_mask_roll, 0.0)
    turnover_roll = 0.5 * pos_vec_roll.sub(pos_vec_roll.shift(1)).abs().sum(axis=1)
    turnover_roll = turnover_roll.where(valid_mask_roll, 0.0)
    cost_roll = turnover_roll * cost_per_turnover
    pnl_net_roll = pnl_gross_roll - cost_roll

    if vol_target_enabled:
        realized_vol_roll = pnl_gross_roll.rolling(vol_target_window, min_periods=vol_target_window).std(ddof=0)
        realized_ann_roll = realized_vol_roll * np.sqrt(252)
        scale_roll = vol_target_ann / realized_ann_roll
        scale_roll = scale_roll.shift(1)
        scale_roll = scale_roll.clip(upper=vol_target_cap)
        scale_roll = scale_roll.where(valid_mask_roll & scale_roll.notna(), 0.0)

        pos_vec_roll = pos_vec_roll.mul(scale_roll, axis=0)
        pnl_gross_roll = (pos_vec_roll * return_proxy_adj_roll).sum(axis=1, min_count=1)
        pnl_gross_roll = pnl_gross_roll.where(valid_mask_roll, 0.0)
        turnover_roll = 0.5 * pos_vec_roll.sub(pos_vec_roll.shift(1)).abs().sum(axis=1)
        turnover_roll = turnover_roll.where(valid_mask_roll, 0.0)
        cost_roll = turnover_roll * cost_per_turnover
        pnl_net_roll = pnl_gross_roll - cost_roll

    bt_daily_roll = pd.DataFrame(
        {
            "residual": resid_roll,
            "zscore": z_roll,
            "signal_raw": flags_roll["signal_raw"].reindex(idx_bt_roll),
            "position_state": state_roll["state"],
            "position_sign": state_roll["state"],
            "pnl_gross": pnl_gross_roll,
            "turnover": turnover_roll,
            "cost": cost_roll,
            "pnl_net": pnl_net_roll,
        },
        index=idx_bt_roll,
    )

    bt_daily_roll.to_parquet(derived_dir / "bt_daily_rolling.parquet", engine="pyarrow", index=True)

    section8_dir = Path("outputs/section_08")
    section8_dir.mkdir(parents=True, exist_ok=True)
    turnover_strategy_roll = pd.DataFrame(
        {"date": bt_daily_roll.index, "turnover_strategy": bt_daily_roll["turnover"].values}
    )
    turnover_strategy_roll.to_csv(
        section8_dir / "turnover_strategy_daily_rolling.csv", index=False
    )

    trade_list_roll = build_trade_list(bt_daily_roll)
    trade_list_roll.to_parquet(
        derived_dir / "bt_trade_list_rolling.parquet", engine="pyarrow", index=False
    )

    if len(trade_list_roll):
        trade_summary_roll = pd.DataFrame(
            [
                {
                    "variant": "rolling",
                    "n_trades": int(len(trade_list_roll)),
                    "avg_hold_days": float(trade_list_roll["holding_days"].mean()),
                    "trade_hit_rate": float((trade_list_roll["cum_pnl_net"] > 0).mean()),
                    "avg_abs_z_entry": float(trade_list_roll["entry_z"].abs().mean()),
                    "p95_hold_days": float(trade_list_roll["holding_days"].quantile(0.95)),
                }
            ]
        )
    else:
        trade_summary_roll = pd.DataFrame(
            [
                {
                    "variant": "rolling",
                    "n_trades": 0,
                    "avg_hold_days": np.nan,
                    "trade_hit_rate": np.nan,
                    "avg_abs_z_entry": np.nan,
                    "p95_hold_days": np.nan,
                }
            ]
        )

    show_table(trade_summary_roll)

    loadings_roll = pd.read_parquet(loadings_roll_path)
    if "trade_date" in loadings_roll.columns:
        loadings_roll["trade_date"] = pd.to_datetime(loadings_roll["trade_date"], errors="coerce")
    means_roll_path = derived_dir / "pca_means_daily_rolling.parquet"
    if not means_roll_path.exists():
        raise FileNotFoundError(means_roll_path)
    means_roll = pd.read_parquet(means_roll_path)
    if not isinstance(means_roll.index, pd.DatetimeIndex) and "trade_date" in means_roll.columns:
        means_roll.index = pd.to_datetime(means_roll["trade_date"], errors="coerce")

    pc1_roll = loadings_roll.loc[loadings_roll["pc"] == 1].pivot(
        index="trade_date", columns="tenor", values="loading"
    )
    pc2_roll = loadings_roll.loc[loadings_roll["pc"] == 2].pivot(
        index="trade_date", columns="tenor", values="loading"
    )

    pc1_roll = pc1_roll.reindex(idx_bt_roll).ffill()
    pc2_roll = pc2_roll.reindex(idx_bt_roll).ffill()

    r_t_roll = duration_scaled_return_proxy.reindex(idx_bt_roll)[tenors]
    mu_roll = means_roll.reindex(idx_bt_roll)[tenors]
    r_centered_roll = r_t_roll - mu_roll

    f1_roll = (pc1_roll[tenors] * r_centered_roll).sum(axis=1, min_count=1)
    f2_roll = (pc2_roll[tenors] * r_centered_roll).sum(axis=1, min_count=1)

    pnl_diag_roll = pnl_gross_roll.where(valid_mask_roll, np.nan)

    window = int(window)

    corr_pnl_pc1_roll = pnl_diag_roll.rolling(window, min_periods=window).corr(f1_roll).shift(1)
    corr_pnl_pc2_roll = pnl_diag_roll.rolling(window, min_periods=window).corr(f2_roll).shift(1)
    beta_pnl_pc1_roll = (
        pnl_diag_roll.rolling(window, min_periods=window).cov(f1_roll)
        / f1_roll.rolling(window, min_periods=window).var()
    ).shift(1)
    beta_pnl_pc2_roll = (
        pnl_diag_roll.rolling(window, min_periods=window).cov(f2_roll)
        / f2_roll.rolling(window, min_periods=window).var()
    ).shift(1)

    exposure_diag_roll = pd.DataFrame(
        {
            "date": idx_bt_roll,
            "corr_pnl_pc1": corr_pnl_pc1_roll.values,
            "corr_pnl_pc2": corr_pnl_pc2_roll.values,
            "beta_pnl_pc1": beta_pnl_pc1_roll.values,
            "beta_pnl_pc2": beta_pnl_pc2_roll.values,
        }
    )

    exposure_diag_roll.to_parquet(
        derived_dir / "bt_exposure_diag_rolling.parquet", engine="pyarrow", index=False
    )

    beta_summary_roll = pd.DataFrame(
        [
            {
                "metric": "pc1",
                "median_abs_beta": float(exposure_diag_roll["beta_pnl_pc1"].abs().median()),
                "p95_abs_beta": float(exposure_diag_roll["beta_pnl_pc1"].abs().quantile(0.95)),
            },
            {
                "metric": "pc2",
                "median_abs_beta": float(exposure_diag_roll["beta_pnl_pc2"].abs().median()),
                "p95_abs_beta": float(exposure_diag_roll["beta_pnl_pc2"].abs().quantile(0.95)),
            },
        ]
    )

    show_table(beta_summary_roll)

    cum_gross_roll = pnl_gross_roll.cumsum()
    cum_net_roll = pnl_net_roll.cumsum()

    fig, ax = plt.subplots()
    ax.plot(cum_gross_roll.index, cum_gross_roll.values, label="gross")
    ax.plot(cum_net_roll.index, cum_net_roll.values, label="net")
    ax.set_title("Rolling equity curve")
    ax.set_xlabel("date")
    ax.set_ylabel("cumulative return (proxy)")
    ax.legend()
    fig.autofmt_xdate()
    fig.savefig(section7_dir / "fig_equity_curve_rolling.png", dpi=150, bbox_inches="tight")
    plt.show()

    running_max_roll = cum_net_roll.cummax()
    drawdown_roll = cum_net_roll - running_max_roll

    fig, ax = plt.subplots()
    ax.plot(drawdown_roll.index, drawdown_roll.values)
    ax.set_title("Rolling drawdown")
    ax.set_xlabel("date")
    ax.set_ylabel("drawdown (proxy)")
    fig.autofmt_xdate()
    fig.savefig(section7_dir / "fig_drawdown_rolling.png", dpi=150, bbox_inches="tight")
    plt.show()

    fig, ax = plt.subplots()
    ax.hist(pnl_net_roll.dropna().values, bins=50)
    ax.set_title("Rolling pnl_net histogram")
    ax.set_xlabel("pnl_net")
    ax.set_ylabel("count")
    fig.savefig(section7_dir / "fig_pnl_hist_rolling.png", dpi=150, bbox_inches="tight")
    plt.show()

    fig, ax = plt.subplots()
    ax.plot(state_roll.index, state_roll["state"].values)
    ax.set_title("Rolling position state")
    ax.set_xlabel("date")
    ax.set_ylabel("state")
    fig.autofmt_xdate()
    fig.savefig(section7_dir / "fig_positions_rolling.png", dpi=150, bbox_inches="tight")
    plt.show()
else:
    show_table(pd.DataFrame([{"rolling_backtest": "skipped", "reason": "missing artifacts"}]))
+-----------+------------+-----------------+------------------+-------------------+-----------------+
| variant   |   n_trades |   avg_hold_days |   trade_hit_rate |   avg_abs_z_entry |   p95_hold_days |
|-----------+------------+-----------------+------------------+-------------------+-----------------|
| rolling   |         68 |         48.4853 |         0.411765 |           2.28344 |              60 |
+-----------+------------+-----------------+------------------+-------------------+-----------------+
+----------+-------------------+----------------+
| metric   |   median_abs_beta |   p95_abs_beta |
|----------+-------------------+----------------|
| pc1      |         0.0198853 |      0.0588042 |
| pc2      |         0.0979981 |      0.455036  |
+----------+-------------------+----------------+

Show code
# 7.9 Causality assertions and spot checks
# Confirm PnL uses prior-day z and weights aligned to the most recent refit <= t-1.
rows_causality = []

weights_refit_exp_path = derived_dir / "pca_weights_refit_expanding.parquet"
if weights_refit_exp_path.exists():
    w_refit_exp = pd.read_parquet(weights_refit_exp_path)
    w_refit_exp_wide = w_refit_exp.pivot(index="refit_date", columns="tenor", values="weight").sort_index()
    w_refit_exp_wide = w_refit_exp_wide.reindex(columns=tenors).fillna(0.0)
    w_exp_expected, refit_exp_used = apply_causality_shift(
        w_refit_exp_wide, duration_scaled_return_proxy.index, "expanding_weights_7_9"
    )
else:
    w_exp_expected = None
    refit_exp_used = None

dates_check = [d for d in idx_bt[:10] if d in z_series_exp.index]
for t in dates_check:
    pos = idx_bt.get_loc(t)
    if pos == 0:
        continue
    prev_date = idx_bt[pos - 1]
    z_used = z_series_exp.shift(1).loc[t]
    weights_match = (
        w_exp_expected is not None
        and t in w_exp_expected.index
        and (weights_bt.loc[t, tenors].fillna(0.0).values == w_exp_expected.loc[t, tenors].fillna(0.0).values).all()
    )
    refit_used = refit_exp_used.loc[t] if refit_exp_used is not None and t in refit_exp_used.index else pd.NaT
    rows_causality.append(
        {
            "mode": "expanding",
            "pnl_date": t,
            "prev_trade_date": prev_date,
            "refit_date_used": refit_used,
            "refit_date_leq_prev_trade": bool(pd.notna(refit_used) and refit_used <= prev_date),
            "z_matches_prev": bool(pd.isna(z_used) or z_used == z_series_exp.loc[prev_date]),
            "weights_match_expected": bool(weights_match),
        }
    )

weights_refit_roll_path = derived_dir / "pca_weights_refit_rolling.parquet"
if flags_roll_path.exists() and weights_roll_path.exists() and weights_refit_roll_path.exists():
    flags_roll_check = pd.read_parquet(flags_roll_path)
    weights_roll_check = pd.read_parquet(weights_roll_path)
    if not isinstance(weights_roll_check.index, pd.DatetimeIndex) and "trade_date" in weights_roll_check.columns:
        weights_roll_check.index = pd.to_datetime(weights_roll_check["trade_date"], errors="coerce")
    if not isinstance(flags_roll_check.index, pd.DatetimeIndex) and "trade_date" in flags_roll_check.columns:
        flags_roll_check.index = pd.to_datetime(flags_roll_check["trade_date"], errors="coerce")
    z_roll_check = flags_roll_check["zscore"]
    idx_bt_roll = duration_scaled_return_proxy.index.intersection(weights_roll_check.index).intersection(z_roll_check.index)
    w_refit_roll = pd.read_parquet(weights_refit_roll_path)
    w_refit_roll_wide = w_refit_roll.pivot(index="refit_date", columns="tenor", values="weight").sort_index()
    w_refit_roll_wide = w_refit_roll_wide.reindex(columns=tenors).fillna(0.0)
    w_roll_expected, refit_roll_used = apply_causality_shift(
        w_refit_roll_wide, duration_scaled_return_proxy.index, "rolling_weights_7_9"
    )
    for t in idx_bt_roll[:10]:
        pos = idx_bt_roll.get_loc(t)
        if pos == 0:
            continue
        prev_date = idx_bt_roll[pos - 1]
        z_used = z_roll_check.shift(1).loc[t]
        weights_match = (
            t in w_roll_expected.index
            and (weights_roll_check.loc[t, tenors].fillna(0.0).values == w_roll_expected.loc[t, tenors].fillna(0.0).values).all()
        )
        refit_used = refit_roll_used.loc[t] if t in refit_roll_used.index else pd.NaT
        rows_causality.append(
            {
                "mode": "rolling",
                "pnl_date": t,
                "prev_trade_date": prev_date,
                "refit_date_used": refit_used,
                "refit_date_leq_prev_trade": bool(pd.notna(refit_used) and refit_used <= prev_date),
                "z_matches_prev": bool(pd.isna(z_used) or z_used == z_roll_check.loc[prev_date]),
                "weights_match_expected": bool(weights_match),
            }
        )

show_table(pd.DataFrame(rows_causality))
+-----------+---------------------+---------------------+---------------------+-----------------------------+------------------+--------------------------+
| mode      | pnl_date            | prev_trade_date     | refit_date_used     | refit_date_leq_prev_trade   | z_matches_prev   | weights_match_expected   |
|-----------+---------------------+---------------------+---------------------+-----------------------------+------------------+--------------------------|
| expanding | 1991-01-08 00:00:00 | 1991-01-07 00:00:00 | 1991-01-04 00:00:00 | True                        | True             | True                     |
| expanding | 1991-01-09 00:00:00 | 1991-01-08 00:00:00 | 1991-01-04 00:00:00 | True                        | True             | True                     |
| expanding | 1991-01-10 00:00:00 | 1991-01-09 00:00:00 | 1991-01-04 00:00:00 | True                        | True             | True                     |
| expanding | 1991-01-11 00:00:00 | 1991-01-10 00:00:00 | 1991-01-04 00:00:00 | True                        | True             | True                     |
| expanding | 1991-01-14 00:00:00 | 1991-01-11 00:00:00 | 1991-01-04 00:00:00 | True                        | True             | True                     |
| expanding | 1991-01-15 00:00:00 | 1991-01-14 00:00:00 | 1991-01-04 00:00:00 | True                        | True             | True                     |
| expanding | 1991-01-16 00:00:00 | 1991-01-15 00:00:00 | 1991-01-04 00:00:00 | True                        | True             | True                     |
| expanding | 1991-01-17 00:00:00 | 1991-01-16 00:00:00 | 1991-01-04 00:00:00 | True                        | True             | True                     |
| expanding | 1991-01-18 00:00:00 | 1991-01-17 00:00:00 | 1991-01-04 00:00:00 | True                        | True             | True                     |
| rolling   | 1993-01-13 00:00:00 | 1993-01-12 00:00:00 | 1993-01-11 00:00:00 | True                        | True             | True                     |
| rolling   | 1993-01-14 00:00:00 | 1993-01-13 00:00:00 | 1993-01-11 00:00:00 | True                        | True             | True                     |
| rolling   | 1993-01-15 00:00:00 | 1993-01-14 00:00:00 | 1993-01-11 00:00:00 | True                        | True             | True                     |
| rolling   | 1993-01-19 00:00:00 | 1993-01-15 00:00:00 | 1993-01-11 00:00:00 | True                        | True             | True                     |
| rolling   | 1993-01-20 00:00:00 | 1993-01-19 00:00:00 | 1993-01-11 00:00:00 | True                        | True             | True                     |
| rolling   | 1993-01-21 00:00:00 | 1993-01-20 00:00:00 | 1993-01-11 00:00:00 | True                        | True             | True                     |
| rolling   | 1993-01-22 00:00:00 | 1993-01-21 00:00:00 | 1993-01-11 00:00:00 | True                        | True             | True                     |
| rolling   | 1993-01-25 00:00:00 | 1993-01-22 00:00:00 | 1993-01-11 00:00:00 | True                        | True             | True                     |
| rolling   | 1993-01-26 00:00:00 | 1993-01-25 00:00:00 | 1993-01-11 00:00:00 | True                        | True             | True                     |
+-----------+---------------------+---------------------+---------------------+-----------------------------+------------------+--------------------------+
Show code
# 7.10 Summary metrics from saved artifacts
# Consolidate key metrics across modes for quick comparison.
summary_rows = []
for mode in ["expanding", "rolling"]:
    bt_path = derived_dir / f"bt_daily_{mode}.parquet"
    trade_path = derived_dir / f"bt_trade_list_{mode}.parquet"
    expo_path = derived_dir / f"bt_exposure_diag_{mode}.parquet"
    if not bt_path.exists() or not trade_path.exists() or not expo_path.exists():
        continue
    bt_df = pd.read_parquet(bt_path)
    trades_df = pd.read_parquet(trade_path)
    expo_df = pd.read_parquet(expo_path)
    gross = bt_df["pnl_gross"].dropna()
    net = bt_df["pnl_net"].dropna()
    gross_std = float(gross.std(ddof=0)) if len(gross) else np.nan
    net_std = float(net.std(ddof=0)) if len(net) else np.nan
    gross_sharpe = float(gross.mean() / gross_std * np.sqrt(252)) if gross_std else np.nan
    net_sharpe = float(net.mean() / net_std * np.sqrt(252)) if net_std else np.nan
    summary_rows.append(
        {
            "mode": mode,
            "n_trades": int(len(trades_df)),
            "avg_holding": float(trades_df["holding_days"].mean()) if len(trades_df) else np.nan,
            "hit_rate": float((trades_df["cum_pnl_net"] > 0).mean()) if len(trades_df) else np.nan,
            "sharpe_gross": gross_sharpe,
            "sharpe_net": net_sharpe,
            "median_abs_beta_pc1": float(expo_df["beta_pnl_pc1"].abs().median()),
            "median_abs_beta_pc2": float(expo_df["beta_pnl_pc2"].abs().median()),
        }
    )

summary_table = pd.DataFrame(summary_rows)
show_table(summary_table)
+-----------+------------+---------------+------------+----------------+--------------+-----------------------+-----------------------+
| mode      |   n_trades |   avg_holding |   hit_rate |   sharpe_gross |   sharpe_net |   median_abs_beta_pc1 |   median_abs_beta_pc2 |
|-----------+------------+---------------+------------+----------------+--------------+-----------------------+-----------------------|
| expanding |         72 |       48.4306 |   0.486111 |     -0.0339278 |    -0.115108 |             0.0156356 |             0.0804524 |
| rolling   |         68 |       48.4853 |   0.411765 |     -0.0291899 |    -0.11629  |             0.0198853 |             0.0979981 |
+-----------+------------+---------------+------------+----------------+--------------+-----------------------+-----------------------+

8. Performance and robustness

Show code
# 8.1 Load strategy artifacts

derived_dir = Path("data/derived")
section8_dir = Path("outputs/section_08")
section8_dir.mkdir(parents=True, exist_ok=True)

spec_path = derived_dir / "backtest_spec.json"
if not spec_path.exists():
    raise FileNotFoundError(spec_path)

with open(spec_path, "r") as f:
    backtest_spec = json.load(f)

curve_path = derived_dir / "curve_treasury_par_canonical.parquet"
if not curve_path.exists():
    raise FileNotFoundError(curve_path)

df_curve = pd.read_parquet(curve_path)
if "date" in df_curve.columns:
    df_curve["date"] = pd.to_datetime(df_curve["date"], errors="coerce")
    df_curve = df_curve.set_index("date")
df_curve.index = ensure_naive_dates(df_curve.index)
df_curve = df_curve.sort_index()

try:
    df_raw
    _ = df_raw
except NameError:
    candidates = list(Path("data").rglob("*treasury*par*.parquet"))
    if not candidates:
        raise FileNotFoundError("raw treasury par curve parquet not found")
    df_raw = pd.read_parquet(candidates[0])
    if "date" in df_raw.columns:
        df_raw["date"] = pd.to_datetime(df_raw["date"], errors="coerce")
        df_raw = df_raw.set_index("date")
    df_raw.index = ensure_naive_dates(df_raw.index)
    df_raw = df_raw.sort_index()

tenors = list(backtest_spec["tenors"])
idx_overlap = df_curve.index.intersection(df_raw.index)
df_yields = df_curve.loc[idx_overlap, tenors].copy()

params = backtest_spec["parameter_defaults"]
butterfly_legs_raw = params.get("butterfly_legs", '["2_yr","5_yr","10_yr"]')
if isinstance(butterfly_legs_raw, str):
    butterfly_legs = json.loads(butterfly_legs_raw)
else:
    butterfly_legs = list(butterfly_legs_raw)
if len(butterfly_legs) != 3 or len(set(butterfly_legs)) != 3:
    raise ValueError(f"butterfly_legs must contain exactly 3 distinct tenors, got {butterfly_legs!r}")
unknown_legs = [t for t in butterfly_legs if t not in tenors]
if unknown_legs:
    raise ValueError(f"butterfly_legs contains tenors not in tenors list: {unknown_legs!r}. tenors={tenors!r}")

durations_path = Path(backtest_spec.get("durations_table_path", derived_dir / "duration_assumptions.parquet"))
if not durations_path.exists():
    raise FileNotFoundError(durations_path)

durations = pd.read_parquet(durations_path)
duration_panel_path = derived_dir / "duration_panel.parquet"
if not duration_panel_path.exists():
    raise FileNotFoundError(duration_panel_path)
duration_panel = pd.read_parquet(duration_panel_path)
duration_panel.index = ensure_naive_dates(duration_panel.index)
duration_panel = duration_panel.reindex(columns=tenors)

_, duration_scaled_return_proxy = build_dv01_returns(df_yields, duration_panel)

# Helper loads bt + weights + PC factor returns for a given mode.
def load_bt_mode(mode: str) -> dict:
    bt_path = derived_dir / f"bt_daily_{mode}.parquet"
    weights_path = derived_dir / f"pca_weights_daily_{mode}.parquet"
    loadings_path = derived_dir / f"pca_loadings_daily_{mode}.parquet"
    means_path = derived_dir / f"pca_means_daily_{mode}.parquet"
    if not bt_path.exists() or not weights_path.exists() or not loadings_path.exists() or not means_path.exists():
        return {}

    bt = pd.read_parquet(bt_path)
    weights = pd.read_parquet(weights_path)
    loadings = pd.read_parquet(loadings_path)
    means_daily = pd.read_parquet(means_path)

    if not isinstance(weights.index, pd.DatetimeIndex) and "trade_date" in weights.columns:
        weights.index = pd.to_datetime(weights["trade_date"], errors="coerce")
    if "trade_date" in loadings.columns:
        loadings["trade_date"] = pd.to_datetime(loadings["trade_date"], errors="coerce")
    if not isinstance(means_daily.index, pd.DatetimeIndex) and "trade_date" in means_daily.columns:
        means_daily.index = pd.to_datetime(means_daily["trade_date"], errors="coerce")
    if not isinstance(bt.index, pd.DatetimeIndex) and "trade_date" in bt.columns:
        bt.index = pd.to_datetime(bt["trade_date"], errors="coerce")

    pc1 = loadings.loc[loadings["pc"] == 1].pivot(
        index="trade_date", columns="tenor", values="loading"
    )
    pc2 = loadings.loc[loadings["pc"] == 2].pivot(
        index="trade_date", columns="tenor", values="loading"
    )
    pc3 = loadings.loc[loadings["pc"] == 3].pivot(
        index="trade_date", columns="tenor", values="loading"
    )

    idx_bt = bt.index.intersection(duration_scaled_return_proxy.index)
    idx_bt = idx_bt.intersection(pc1.index).intersection(pc2.index).intersection(pc3.index).intersection(means_daily.index)
    idx_bt = idx_bt.sort_values()

    pc1 = pc1.reindex(idx_bt).ffill()
    pc2 = pc2.reindex(idx_bt).ffill()
    pc3 = pc3.reindex(idx_bt).ffill()

    r_t = duration_scaled_return_proxy.reindex(idx_bt)
    mu_t = means_daily.reindex(idx_bt)[tenors]
    r_centered = r_t[tenors] - mu_t

    f1 = (pc1[tenors] * r_centered).sum(axis=1, min_count=1)
    f2 = (pc2[tenors] * r_centered).sum(axis=1, min_count=1)
    f3 = (pc3[tenors] * r_centered).sum(axis=1, min_count=1)

    return {
        "bt": bt,
        "weights": weights,
        "f1": f1,
        "f2": f2,
        "f3": f3,
    }


bt_exp = load_bt_mode("expanding")
bt_roll = load_bt_mode("rolling")

if not bt_exp or not bt_roll:
    raise FileNotFoundError("Missing expanding or rolling artifacts for Section 8")

pnl_gross_exp = bt_exp["bt"]["pnl_gross"].dropna()
pnl_net_exp = bt_exp["bt"]["pnl_net"].dropna() if "pnl_net" in bt_exp["bt"].columns else None
turnover_exp = bt_exp["bt"].get("turnover")

pnl_gross_roll = bt_roll["bt"]["pnl_gross"].dropna()
pnl_net_roll = bt_roll["bt"]["pnl_net"].dropna() if "pnl_net" in bt_roll["bt"].columns else None
turnover_roll = bt_roll["bt"].get("turnover")

idx_common = pnl_gross_exp.index.intersection(pnl_gross_roll.index)
if turnover_exp is not None and turnover_roll is not None:
    idx_common = idx_common.intersection(turnover_exp.index).intersection(turnover_roll.index)


def compute_summary(pnl: pd.Series, turnover: pd.Series | None) -> dict:
    if pnl is None or len(pnl) == 0:
        return {
            "start": None,
            "end": None,
            "n_days": 0,
            "ann_ret": np.nan,
            "ann_vol": np.nan,
            "sharpe": np.nan,
            "hit_rate": np.nan,
            "max_drawdown": np.nan,
            "avg_turnover": np.nan,
            "var_95": np.nan,
        }
    mean = float(pnl.mean())
    std = float(pnl.std(ddof=0))
    ann_ret = mean * 252
    ann_vol = std * np.sqrt(252)
    sharpe = float(ann_ret / ann_vol) if ann_vol and np.isfinite(ann_vol) else np.nan
    hit_rate = float((pnl > 0).mean())
    cum = pnl.cumsum()
    drawdown = cum - cum.cummax()
    max_dd = float(drawdown.min()) if len(drawdown) else np.nan
    avg_turn = float(turnover.mean()) if turnover is not None and len(turnover) else np.nan
    var_95 = float(pnl.quantile(0.05))
    return {
        "start": pnl.index.min(),
        "end": pnl.index.max(),
        "n_days": int(len(pnl)),
        "ann_ret": ann_ret,
        "ann_vol": ann_vol,
        "sharpe": sharpe,
        "hit_rate": hit_rate,
        "max_drawdown": max_dd,
        "avg_turnover": avg_turn,
        "var_95": var_95,
    }


def compute_drawdown_episodes(pnl: pd.Series) -> pd.DataFrame:
    if pnl is None or len(pnl) == 0:
        return pd.DataFrame(
            columns=[
                "start_date",
                "trough_date",
                "recovery_date",
                "depth",
                "days_to_trough",
                "days_to_recover",
            ]
        )
    cum = pnl.cumsum()
    peak_val = -np.inf
    peak_date = None
    in_dd = False
    start_date = None
    trough_date = None
    trough_val = None
    rows = []
    for dt, val in cum.items():
        if val >= peak_val:
            if in_dd:
                rows.append(
                    {
                        "start_date": start_date,
                        "trough_date": trough_date,
                        "recovery_date": dt,
                        "depth": float(trough_val - peak_val),
                        "days_to_trough": (trough_date - start_date).days
                        if start_date is not None and trough_date is not None
                        else None,
                        "days_to_recover": (dt - start_date).days if start_date is not None else None,
                    }
                )
                in_dd = False
            peak_val = val
            peak_date = dt
        else:
            if not in_dd:
                start_date = peak_date
                trough_date = dt
                trough_val = val
                in_dd = True
            else:
                if val < trough_val:
                    trough_val = val
                    trough_date = dt

    if in_dd:
        rows.append(
            {
                "start_date": start_date,
                "trough_date": trough_date,
                "recovery_date": None,
                "depth": float(trough_val - peak_val),
                "days_to_trough": (trough_date - start_date).days
                if start_date is not None and trough_date is not None
                else None,
                "days_to_recover": None,
            }
        )

    dd = pd.DataFrame(rows)
    if len(dd):
        dd = dd.sort_values("depth").head(10).reset_index(drop=True)
    return dd
Show code
# 8.2 Summary metrics table
# Produce high-level performance stats for each variant/series.
rows_summary = []
rows_summary.append({"variant": "expanding", "series": "gross", **compute_summary(pnl_gross_exp, turnover_exp)})
if pnl_net_exp is not None and len(pnl_net_exp):
    rows_summary.append({"variant": "expanding", "series": "net", **compute_summary(pnl_net_exp, turnover_exp)})

rows_summary.append({"variant": "rolling", "series": "gross", **compute_summary(pnl_gross_roll, turnover_roll)})
if pnl_net_roll is not None and len(pnl_net_roll):
    rows_summary.append({"variant": "rolling", "series": "net", **compute_summary(pnl_net_roll, turnover_roll)})

summary_metrics = pd.DataFrame(rows_summary)
write_df_csv_and_md(
    summary_metrics,
    section8_dir / "summary_metrics.csv",
    section8_dir / "summary_metrics.md",
)
show_table(summary_metrics)
+-----------+----------+---------------------+---------------------+----------+--------------+-----------+------------+------------+----------------+----------------+-------------+
| variant   | series   | start               | end                 |   n_days |      ann_ret |   ann_vol |     sharpe |   hit_rate |   max_drawdown |   avg_turnover |      var_95 |
|-----------+----------+---------------------+---------------------+----------+--------------+-----------+------------+------------+----------------+----------------+-------------|
| expanding | gross    | 1991-01-07 00:00:00 | 2026-01-16 00:00:00 |     8756 | -0.000663557 | 0.0195579 | -0.0339278 |   0.192325 |      -0.144349 |      0.0630155 | -0.00165656 |
| expanding | net      | 1991-01-07 00:00:00 | 2026-01-16 00:00:00 |     8756 | -0.00225155  | 0.0195602 | -0.115108  |   0.191754 |      -0.173012 |      0.0630155 | -0.00166644 |
| rolling   | gross    | 1993-01-12 00:00:00 | 2026-01-16 00:00:00 |     8252 | -0.000618649 | 0.0211939 | -0.0291899 |   0.19365  |      -0.188303 |      0.0732331 | -0.00162005 |
| rolling   | net      | 1993-01-12 00:00:00 | 2026-01-16 00:00:00 |     8252 | -0.00246412  | 0.0211895 | -0.11629   |   0.192559 |      -0.212617 |      0.0732331 | -0.00162888 |
+-----------+----------+---------------------+---------------------+----------+--------------+-----------+------------+------------+----------------+----------------+-------------+
Show code
# 8.2b Trade stats (trade-level)
def summarize_trade_stats(trades_df: pd.DataFrame, variant: str) -> dict:
    if trades_df is None or len(trades_df) == 0:
        return {
            "variant": variant,
            "n_trades": 0,
            "trade_hit_rate": np.nan,
            "avg_hold_days": np.nan,
            "avg_abs_z_entry": np.nan,
            "p95_hold_days": np.nan,
        }
    return {
        "variant": variant,
        "n_trades": int(len(trades_df)),
        "trade_hit_rate": float((trades_df["cum_pnl_net"] > 0).mean()),
        "avg_hold_days": float(trades_df["holding_days"].mean()),
        "avg_abs_z_entry": float(trades_df["entry_z"].abs().mean()),
        "p95_hold_days": float(trades_df["holding_days"].quantile(0.95)),
    }

trade_stats_rows = []
if "trade_list_exp" not in locals():
    trade_path_exp = derived_dir / "bt_trade_list_expanding.parquet"
    if trade_path_exp.exists():
        trade_list_exp = pd.read_parquet(trade_path_exp)
if "trade_list_exp" in locals():
    trade_stats_rows.append(summarize_trade_stats(trade_list_exp, "expanding"))
if "trade_list_roll" not in locals():
    trade_path_roll = derived_dir / "bt_trade_list_rolling.parquet"
    if trade_path_roll.exists():
        trade_list_roll = pd.read_parquet(trade_path_roll)
if "trade_list_roll" in locals():
    trade_stats_rows.append(summarize_trade_stats(trade_list_roll, "rolling"))

trade_stats = pd.DataFrame(trade_stats_rows)
write_df_csv_and_md(
    trade_stats,
    section8_dir / "trade_stats.csv",
    section8_dir / "trade_stats.md",
)
show_table(trade_stats)
+-----------+------------+------------------+-----------------+-------------------+-----------------+
| variant   |   n_trades |   trade_hit_rate |   avg_hold_days |   avg_abs_z_entry |   p95_hold_days |
|-----------+------------+------------------+-----------------+-------------------+-----------------|
| expanding |         72 |         0.486111 |         48.4306 |           2.27793 |              60 |
| rolling   |         68 |         0.411765 |         48.4853 |           2.28344 |              60 |
+-----------+------------+------------------+-----------------+-------------------+-----------------+
Show code
# 8.3 Drawdown table and plots
# Largest drawdowns from cumulative PnL for reporting.
drawdown_rows = []
for variant, series_name, pnl in [
    ("expanding", "gross", pnl_gross_exp),
    ("expanding", "net", pnl_net_exp),
    ("rolling", "gross", pnl_gross_roll),
    ("rolling", "net", pnl_net_roll),
]:
    if pnl is None or len(pnl) == 0:
        continue
    dd = compute_drawdown_episodes(pnl)
    if len(dd) == 0:
        continue
    dd["variant"] = variant
    dd["series"] = series_name
    drawdown_rows.append(dd)

drawdown_episodes = pd.concat(drawdown_rows, ignore_index=True) if drawdown_rows else pd.DataFrame()
write_df_csv_and_md(
    drawdown_episodes,
    section8_dir / "drawdown_episodes.csv",
    section8_dir / "drawdown_episodes.md",
)

cum_exp = pnl_gross_exp.cumsum()
dd_series_exp = cum_exp - cum_exp.cummax()
fig, ax = plt.subplots()
ax.plot(dd_series_exp.index, dd_series_exp.values)
ax.set_title("Expanding drawdown")
ax.set_xlabel("date")
ax.set_ylabel("drawdown (proxy)")
fig.autofmt_xdate()
fig.savefig(section8_dir / "drawdown_expanding.png", dpi=150, bbox_inches="tight")
plt.show()

cum_roll = pnl_gross_roll.cumsum()
dd_series_roll = cum_roll - cum_roll.cummax()
fig, ax = plt.subplots()
ax.plot(dd_series_roll.index, dd_series_roll.values)
ax.set_title("Rolling drawdown")
ax.set_xlabel("date")
ax.set_ylabel("drawdown (proxy)")
fig.autofmt_xdate()
fig.savefig(section8_dir / "drawdown_rolling.png", dpi=150, bbox_inches="tight")
plt.show()

Show code
# 8.4 Performance by era
# Split sample into macro eras to check regime dependence.
eras = [
    ("pre_2008", pd.Timestamp("1900-01-01"), pd.Timestamp("2007-12-31")),
    ("post_2008", pd.Timestamp("2008-01-01"), pd.Timestamp("2019-12-31")),
    ("post_2020", pd.Timestamp("2020-01-01"), pd.Timestamp("2100-01-01")),
]

rows_era = []
for mode, series_name, pnl, turnover in [
    ("expanding", "gross", pnl_gross_exp, turnover_exp),
    ("expanding", "net", pnl_net_exp, turnover_exp),
    ("rolling", "gross", pnl_gross_roll, turnover_roll),
    ("rolling", "net", pnl_net_roll, turnover_roll),
]:
    if pnl is None or len(pnl) == 0:
        continue
    for era_name, start, end in eras:
        pnl_era = pnl.loc[(pnl.index >= start) & (pnl.index <= end)]
        if turnover is not None:
            turnover_era = turnover.reindex(pnl_era.index)
        else:
            turnover_era = None
        metrics = compute_summary(pnl_era, turnover_era)
        rows_era.append({"variant": mode, "series": series_name, "era": era_name, **metrics})

perf_by_era = pd.DataFrame(rows_era)
write_df_csv_and_md(
    perf_by_era,
    section8_dir / "performance_by_era.csv",
    section8_dir / "performance_by_era.md",
)
show_table(perf_by_era)
+-----------+----------+-----------+---------------------+---------------------+----------+--------------+------------+-------------+------------+----------------+----------------+--------------+
| variant   | series   | era       | start               | end                 |   n_days |      ann_ret |    ann_vol |      sharpe |   hit_rate |   max_drawdown |   avg_turnover |       var_95 |
|-----------+----------+-----------+---------------------+---------------------+----------+--------------+------------+-------------+------------+----------------+----------------+--------------|
| expanding | gross    | pre_2008  | 1991-01-07 00:00:00 | 2007-12-31 00:00:00 |     4250 |  0.00251947  | 0.0240243  |  0.104872   |   0.202353 |     -0.0967924 |      0.0938766 | -0.00201202  |
| expanding | gross    | post_2008 | 2008-01-02 00:00:00 | 2019-12-31 00:00:00 |     2995 | -0.00174402  | 0.0155912  | -0.111859   |   0.196661 |     -0.0536979 |      0.037964  | -0.00146552  |
| expanding | gross    | post_2020 | 2020-01-02 00:00:00 | 2026-01-16 00:00:00 |     1511 | -0.00747486  | 0.0105386  | -0.709287   |   0.155526 |     -0.0469744 |      0.0258679 | -0.00111695  |
| expanding | net      | pre_2008  | 1991-01-07 00:00:00 | 2007-12-31 00:00:00 |     4250 |  0.000153782 | 0.0240316  |  0.00639915 |   0.201412 |     -0.100461  |      0.0938766 | -0.00204287  |
| expanding | net      | post_2008 | 2008-01-02 00:00:00 | 2019-12-31 00:00:00 |     2995 | -0.00270071  | 0.0155749  | -0.173402   |   0.196327 |     -0.0549673 |      0.037964  | -0.00146552  |
| expanding | net      | post_2020 | 2020-01-02 00:00:00 | 2026-01-16 00:00:00 |     1511 | -0.00812674  | 0.0105694  | -0.768896   |   0.155526 |     -0.049785  |      0.0258679 | -0.00111695  |
| rolling   | gross    | pre_2008  | 1993-01-12 00:00:00 | 2007-12-31 00:00:00 |     3746 |  0.00641741  | 0.0288684  |  0.222298   |   0.225841 |     -0.117342  |      0.123184  | -0.00215513  |
| rolling   | gross    | post_2008 | 2008-01-02 00:00:00 | 2019-12-31 00:00:00 |     2995 | -0.00598417  | 0.0125796  | -0.475704   |   0.176628 |     -0.0774696 |      0.0383395 | -0.00140983  |
| rolling   | gross    | post_2020 | 2020-01-02 00:00:00 | 2026-01-16 00:00:00 |     1511 | -0.00742695  | 0.00851242 | -0.872484   |   0.147584 |     -0.0503282 |      0.0185607 | -0.000755967 |
| rolling   | net      | pre_2008  | 1993-01-12 00:00:00 | 2007-12-31 00:00:00 |     3746 |  0.00331317  | 0.028865   |  0.114782   |   0.224506 |     -0.120087  |      0.123184  | -0.0021568   |
| rolling   | net      | post_2008 | 2008-01-02 00:00:00 | 2019-12-31 00:00:00 |     2995 | -0.00695032  | 0.0125779  | -0.552584   |   0.175626 |     -0.0875425 |      0.0383395 | -0.00141208  |
| rolling   | net      | post_2020 | 2020-01-02 00:00:00 | 2026-01-16 00:00:00 |     1511 | -0.00789468  | 0.00850379 | -0.928372   |   0.146923 |     -0.0527539 |      0.0185607 | -0.000755967 |
+-----------+----------+-----------+---------------------+---------------------+----------+--------------+------------+-------------+------------+----------------+----------------+--------------+
Show code
# 8.5.1 Distribution of weights
# Distribution stats for weights and concentration diagnostics.
def weights_summary(weights: pd.DataFrame) -> pd.DataFrame:
    rows = []
    for col in weights.columns:
        s = weights[col].dropna()
        rows.append(
            {
                "item": col,
                "mean": float(s.mean()) if len(s) else np.nan,
                "std": float(s.std(ddof=0)) if len(s) else np.nan,
                "p05": float(s.quantile(0.05)) if len(s) else np.nan,
                "p95": float(s.quantile(0.95)) if len(s) else np.nan,
            }
        )

    abs_w = weights.abs()
    sum_abs = abs_w.sum(axis=1)
    hhi = (abs_w.div(sum_abs.replace(0, np.nan), axis=0).fillna(0) ** 2).sum(axis=1)
    max_abs = abs_w.max(axis=1)
    for name, series in [
        ("sum_abs", sum_abs),
        ("hhi_abs", hhi),
        ("max_abs", max_abs),
    ]:
        series = series.dropna()
        rows.append(
            {
                "item": name,
                "mean": float(series.mean()) if len(series) else np.nan,
                "std": float(series.std(ddof=0)) if len(series) else np.nan,
                "p05": float(series.quantile(0.05)) if len(series) else np.nan,
                "p95": float(series.quantile(0.95)) if len(series) else np.nan,
            }
        )

    return pd.DataFrame(rows)


weights_exp = bt_exp["weights"].reindex(duration_scaled_return_proxy.index)
weights_roll = bt_roll["weights"].reindex(duration_scaled_return_proxy.index)

weights_summary_exp = weights_summary(weights_exp)
weights_summary_roll = weights_summary(weights_roll)

weights_summary_exp.to_csv(section8_dir / "weights_summary_expanding.csv", index=False)
weights_summary_roll.to_csv(section8_dir / "weights_summary_rolling.csv", index=False)

def plot_weights_heatmap(weights: pd.DataFrame, title: str, out_path: Path) -> None:
    # Heatmap of average absolute weights by era.
    abs_w = weights.abs()
    era_vals = []
    era_labels = []
    for era_name, start, end in eras:
        sub = abs_w.loc[(abs_w.index >= start) & (abs_w.index <= end)]
        if len(sub) == 0:
            continue
        era_vals.append(sub.mean().values)
        era_labels.append(era_name)
    if not era_vals:
        return
    data = np.vstack(era_vals)
    fig, ax = plt.subplots()
    im = ax.imshow(data, aspect="auto")
    ax.set_xticks(range(len(abs_w.columns)))
    ax.set_xticklabels(abs_w.columns, rotation=45, ha="right")
    ax.set_yticks(range(len(era_labels)))
    ax.set_yticklabels(era_labels)
    ax.set_title(title)
    fig.colorbar(im, ax=ax, fraction=0.046, pad=0.04)
    fig.tight_layout()
    fig.savefig(out_path, dpi=150, bbox_inches="tight")
    plt.show()


plot_weights_heatmap(weights_exp, "Expanding abs weights by era", section8_dir / "weights_heatmap_expanding.png")
plot_weights_heatmap(weights_roll, "Rolling abs weights by era", section8_dir / "weights_heatmap_rolling.png")

Show code
# 8.5.2 Turnover
# Turnover time series and summary statistics.
turnover_exp_aligned = turnover_exp.reindex(idx_common) if turnover_exp is not None else None
turnover_roll_aligned = turnover_roll.reindex(idx_common) if turnover_roll is not None else None

fig, ax = plt.subplots()
if turnover_exp_aligned is not None:
    ax.plot(turnover_exp_aligned.index, turnover_exp_aligned.values, label="expanding")
    ax.plot(
        turnover_exp_aligned.index,
        turnover_exp_aligned.rolling(63, min_periods=1).mean().values,
        label="expanding_63d",
    )
if turnover_roll_aligned is not None:
    ax.plot(turnover_roll_aligned.index, turnover_roll_aligned.values, label="rolling")
    ax.plot(
        turnover_roll_aligned.index,
        turnover_roll_aligned.rolling(63, min_periods=1).mean().values,
        label="rolling_63d",
    )
ax.set_title("Turnover time series")
ax.set_xlabel("date")
ax.set_ylabel("turnover")
ax.legend(ncol=2, fontsize=8)
fig.autofmt_xdate()
fig.savefig(section8_dir / "turnover_timeseries.png", dpi=150, bbox_inches="tight")
plt.show()

def summarize_turnover_stats(series: pd.Series | None) -> dict:
    if series is None:
        return {"mean": np.nan, "p95": np.nan, "p99": np.nan, "max": np.nan}
    s = series.dropna()
    if len(s) == 0:
        return {"mean": np.nan, "p95": np.nan, "p99": np.nan, "max": np.nan}
    return {
        "mean": float(s.mean()),
        "p95": float(s.quantile(0.95)),
        "p99": float(s.quantile(0.99)),
        "max": float(s.max()),
    }

turnover_refit_vs_strategy = []
if "expanding_turnover" in locals():
    refit_stats = summarize_turnover_stats(expanding_turnover["turnover"])
    strat_stats = summarize_turnover_stats(turnover_exp)
    turnover_refit_vs_strategy.append(
        {
            "variant": "expanding",
            "turnover_refit_mean": refit_stats["mean"],
            "turnover_refit_p95": refit_stats["p95"],
            "turnover_refit_p99": refit_stats["p99"],
            "turnover_refit_max": refit_stats["max"],
            "turnover_strategy_mean": strat_stats["mean"],
            "turnover_strategy_p95": strat_stats["p95"],
            "turnover_strategy_p99": strat_stats["p99"],
            "turnover_strategy_max": strat_stats["max"],
        }
    )
if "rolling_turnover" in locals():
    refit_stats = summarize_turnover_stats(rolling_turnover["turnover"])
    strat_stats = summarize_turnover_stats(turnover_roll)
    turnover_refit_vs_strategy.append(
        {
            "variant": "rolling",
            "turnover_refit_mean": refit_stats["mean"],
            "turnover_refit_p95": refit_stats["p95"],
            "turnover_refit_p99": refit_stats["p99"],
            "turnover_refit_max": refit_stats["max"],
            "turnover_strategy_mean": strat_stats["mean"],
            "turnover_strategy_p95": strat_stats["p95"],
            "turnover_strategy_p99": strat_stats["p99"],
            "turnover_strategy_max": strat_stats["max"],
        }
    )
turnover_refit_vs_strategy_df = pd.DataFrame(turnover_refit_vs_strategy)
turnover_refit_vs_strategy_df.to_csv(
    section8_dir / "turnover_refit_vs_strategy.csv", index=False
)

rows_turn = []
for mode, series, bt_mode in [
    ("expanding", turnover_exp, bt_exp["bt"]),
    ("rolling", turnover_roll, bt_roll["bt"]),
]:
    if series is None:
        continue
    s = series.dropna()
    active_mask = None
    if bt_mode is not None and "position_state" in bt_mode.columns:
        active_mask = bt_mode["position_state"] != 0
    mean_active = np.nan
    if active_mask is not None:
        active_turn = series.reindex(active_mask.index).where(active_mask).dropna()
        mean_active = float(active_turn.mean()) if len(active_turn) else np.nan
    rows_turn.append(
        {
            "variant": mode,
            "mean_turnover_all_days": float(s.mean()) if len(s) else np.nan,
            "mean_turnover_active_days": mean_active,
            "median": float(s.median()) if len(s) else np.nan,
            "p90": float(s.quantile(0.90)) if len(s) else np.nan,
            "max": float(s.max()) if len(s) else np.nan,
        }
    )

turnover_summary = pd.DataFrame(rows_turn)
write_df_csv_and_md(
    turnover_summary,
    section8_dir / "turnover_summary.csv",
    section8_dir / "turnover_summary.md",
)
show_table(turnover_summary)

+-----------+--------------------------+-----------------------------+----------+-------+---------+
| variant   |   mean_turnover_all_days |   mean_turnover_active_days |   median |   p90 |     max |
|-----------+--------------------------+-----------------------------+----------+-------+---------|
| expanding |                0.0630155 |                    0.080642 |        0 |     0 | 7.21174 |
| rolling   |                0.0732331 |                    0.096368 |        0 |     0 | 9.74916 |
+-----------+--------------------------+-----------------------------+----------+-------+---------+
Show code
# 8.5.3 Realized correlation of pnl with PC1 and PC2
# Rolling correlations with PC factor returns for exposure monitoring.
# PnL-based rolling corr can be NaN during flat periods (zero variance), so use
# a proxy return for hedge-leakage and keep a conditional PnL diagnostic.
def rolling_corr_always(series_x: pd.Series, f1: pd.Series, f2: pd.Series, window: int) -> pd.DataFrame:
    idx = series_x.index.intersection(f1.index).intersection(f2.index)
    x_s = series_x.reindex(idx)
    f1_s = f1.reindex(idx)
    f2_s = f2.reindex(idx)
    corr1 = x_s.rolling(window, min_periods=window).corr(f1_s)
    corr2 = x_s.rolling(window, min_periods=window).corr(f2_s)
    std_x = x_s.rolling(window, min_periods=window).std(ddof=0)
    std_f1 = f1_s.rolling(window, min_periods=window).std(ddof=0)
    std_f2 = f2_s.rolling(window, min_periods=window).std(ddof=0)
    corr1 = corr1.where((std_x > 0) & (std_f1 > 0), np.nan).shift(1)
    corr2 = corr2.where((std_x > 0) & (std_f2 > 0), np.nan).shift(1)
    return pd.DataFrame({"date": idx, "corr_pc1": corr1.values, "corr_pc2": corr2.values})


def rolling_corr_conditional(
    pnl: pd.Series,
    f1: pd.Series,
    f2: pd.Series,
    window: int,
    active_mask: pd.Series,
    min_active_obs: int,
) -> pd.DataFrame:
    idx = pnl.index.intersection(f1.index).intersection(f2.index)
    active = active_mask.reindex(idx).fillna(False)
    pnl_s = pnl.reindex(idx).where(active)
    f1_s = f1.reindex(idx).where(active)
    f2_s = f2.reindex(idx).where(active)
    corr1 = pnl_s.rolling(window, min_periods=min_active_obs).corr(f1_s)
    corr2 = pnl_s.rolling(window, min_periods=min_active_obs).corr(f2_s)
    std_x = pnl_s.rolling(window, min_periods=min_active_obs).std(ddof=0)
    std_f1 = f1_s.rolling(window, min_periods=min_active_obs).std(ddof=0)
    std_f2 = f2_s.rolling(window, min_periods=min_active_obs).std(ddof=0)
    corr1 = corr1.where((std_x > 0) & (std_f1 > 0), np.nan).shift(1)
    corr2 = corr2.where((std_x > 0) & (std_f2 > 0), np.nan).shift(1)
    return pd.DataFrame({"date": idx, "corr_pc1": corr1.values, "corr_pc2": corr2.values})


f1_exp = bt_exp["f1"]
f2_exp = bt_exp["f2"]
f1_roll = bt_roll["f1"]
f2_roll = bt_roll["f2"]

weights_exp_daily = bt_exp["weights"].reindex(duration_scaled_return_proxy.index)
weights_roll_daily = bt_roll["weights"].reindex(duration_scaled_return_proxy.index)

proxy_ret_exp = (
    weights_exp_daily * duration_scaled_return_proxy.reindex(weights_exp_daily.index)
).sum(axis=1, min_count=1)
proxy_ret_roll = (
    weights_roll_daily * duration_scaled_return_proxy.reindex(weights_roll_daily.index)
).sum(axis=1, min_count=1)

pos_state_exp = bt_exp["bt"]["position_state"] if "position_state" in bt_exp["bt"].columns else None
pos_state_roll = bt_roll["bt"]["position_state"] if "position_state" in bt_roll["bt"].columns else None
active_exp = (pos_state_exp.abs() > 0) if pos_state_exp is not None else pnl_gross_exp.notna()
active_roll = (pos_state_roll.abs() > 0) if pos_state_roll is not None else pnl_gross_roll.notna()

min_active_obs = int(backtest_spec["parameter_defaults"].get("min_active_obs", 20))

proxy_corr63_exp = rolling_corr_always(proxy_ret_exp, f1_exp, f2_exp, 63)
proxy_corr63_roll = rolling_corr_always(proxy_ret_roll, f1_roll, f2_roll, 63)

proxy_corr252_exp = rolling_corr_always(proxy_ret_exp, f1_exp, f2_exp, 252)
proxy_corr252_roll = rolling_corr_always(proxy_ret_roll, f1_roll, f2_roll, 252)

cond_corr63_exp = rolling_corr_conditional(pnl_gross_exp, f1_exp, f2_exp, 63, active_exp, min_active_obs)
cond_corr63_roll = rolling_corr_conditional(pnl_gross_roll, f1_roll, f2_roll, 63, active_roll, min_active_obs)

cond_corr252_exp = rolling_corr_conditional(pnl_gross_exp, f1_exp, f2_exp, 252, active_exp, min_active_obs)
cond_corr252_roll = rolling_corr_conditional(pnl_gross_roll, f1_roll, f2_roll, 252, active_roll, min_active_obs)

proxy_corr63 = proxy_corr63_exp.merge(proxy_corr63_roll, on="date", suffixes=("_exp", "_roll"))
proxy_corr252 = proxy_corr252_exp.merge(proxy_corr252_roll, on="date", suffixes=("_exp", "_roll"))

cond_corr63 = cond_corr63_exp.merge(cond_corr63_roll, on="date", suffixes=("_exp", "_roll"))
cond_corr252 = cond_corr252_exp.merge(cond_corr252_roll, on="date", suffixes=("_exp", "_roll"))

proxy_corr63.to_csv(section8_dir / "pnl_pc_corr_rolling_63d.csv", index=False)
proxy_corr252.to_csv(section8_dir / "pnl_pc_corr_rolling_252d.csv", index=False)
cond_corr63.to_csv(section8_dir / "pnl_pc_corr_active_rolling_63d.csv", index=False)
cond_corr252.to_csv(section8_dir / "pnl_pc_corr_active_rolling_252d.csv", index=False)
Show code
# 8.5.4 PCA regression neutrality check
# Regress the realized butterfly proxy return on PCA factor returns to confirm:
# beta1 ~ 0, beta2 ~ 0, beta3 ~ 1 (by construction up to conditioning/freeze events).
docs_figures_dir = Path("docs/figures")
docs_tables_dir = Path("docs/tables")
docs_figures_dir.mkdir(parents=True, exist_ok=True)
docs_tables_dir.mkdir(parents=True, exist_ok=True)

def ols_with_intercept(y: pd.Series, X: pd.DataFrame) -> dict:
    idx = y.index.intersection(X.index)
    yv = y.reindex(idx).astype("float64")
    Xv = X.reindex(idx).astype("float64")
    mask = yv.notna() & Xv.notna().all(axis=1)
    yv = yv[mask]
    Xv = Xv.loc[mask]
    if len(yv) < 10:
        return {}
    A = np.column_stack([np.ones(len(yv)), Xv.values])
    beta, *_ = np.linalg.lstsq(A, yv.values, rcond=None)
    y_hat = A @ beta
    sse = float(np.sum((yv.values - y_hat) ** 2))
    sst = float(np.sum((yv.values - float(np.mean(yv.values))) ** 2))
    r2 = 1.0 - (sse / sst) if sst > 0 else np.nan
    return {
        "alpha": float(beta[0]),
        "beta1": float(beta[1]),
        "beta2": float(beta[2]),
        "beta3": float(beta[3]),
        "r2": float(r2) if np.isfinite(r2) else np.nan,
        "n_obs": int(len(yv)),
        "y_hat": pd.Series(y_hat, index=yv.index),
    }


rows_reg = []
scatter_by_mode = {}
betas_by_mode = {}
r2_by_mode = {}
for mode, bt in [("expanding", bt_exp), ("rolling", bt_roll)]:
    if not bt:
        continue
    w = bt["weights"].reindex(duration_scaled_return_proxy.index).dropna(how="any")
    r = duration_scaled_return_proxy.reindex(w.index)
    means_path = derived_dir / f"pca_means_daily_{mode}.parquet"
    means_daily = pd.read_parquet(means_path).reindex(w.index)[tenors]
    r_centered = r[tenors] - means_daily
    y_bfly = (w[tenors] * r_centered).sum(axis=1, min_count=1)
    X = pd.DataFrame({"f1": bt["f1"], "f2": bt["f2"], "f3": bt["f3"]})
    df_scatter = pd.DataFrame(
        {
            "y": y_bfly,
            "f1": bt["f1"],
            "f2": bt["f2"],
            "f3": bt["f3"],
        }
    ).dropna()
    scatter_by_mode[mode] = df_scatter
    res = ols_with_intercept(y_bfly, X)
    if not res:
        continue
    rows_reg.append(
        {
            "mode": mode,
            "alpha": res["alpha"],
            "beta1": res["beta1"],
            "beta2": res["beta2"],
            "beta3": res["beta3"],
            "r2": res["r2"],
            "n_obs": res["n_obs"],
        }
    )
    betas_by_mode[mode] = [res["beta1"], res["beta2"], res["beta3"]]
    r2_by_mode[mode] = res["r2"]

reg_summary = pd.DataFrame(rows_reg)
reg_csv = section8_dir / "pc_regression_summary.csv"
write_df_csv_and_md(reg_summary, reg_csv, section8_dir / "pc_regression_summary.md")
show_table(reg_summary)

tab_link = docs_tables_dir / "tab_pc_regression_summary.csv"
try:
    if tab_link.exists() or tab_link.is_symlink():
        tab_link.unlink()
    tab_link.symlink_to(Path("../../outputs/section_08/pc_regression_summary.csv"))
except Exception:
    pass

def save_scatter(scatter_by_mode: dict, factor_col: str, out_path: Path, title: str) -> None:
    fig, ax = plt.subplots()

    xs_all = []
    ys_all = []
    for mode, df in scatter_by_mode.items():
        if df is None or df.empty:
            continue
        xs_all.append(df[factor_col].astype("float64").values)
        ys_all.append(df["y"].astype("float64").values)
    if not xs_all:
        return

    x_all = np.concatenate(xs_all) * 10000.0
    y_all = np.concatenate(ys_all) * 10000.0

    x_lo, x_hi = np.quantile(x_all[np.isfinite(x_all)], [0.01, 0.99])
    y_lo, y_hi = np.quantile(y_all[np.isfinite(y_all)], [0.01, 0.99])

    for mode, df in scatter_by_mode.items():
        if df is None or df.empty:
            continue

        df_plot = df.copy()
        if len(df_plot) > 8000:
            df_plot = df_plot.sample(8000, random_state=0)

        x = df_plot[factor_col].astype("float64").values * 10000.0
        y = df_plot["y"].astype("float64").values * 10000.0

        mask = np.isfinite(x) & np.isfinite(y)
        mask = mask & (x >= x_lo) & (x <= x_hi) & (y >= y_lo) & (y <= y_hi)

        kept = int(mask.sum())
        total = int(np.isfinite(x).sum() & np.isfinite(y).sum())
        clipped = int(len(x) - kept)

        ax.scatter(x[mask], y[mask], label=mode, alpha=0.2, s=10)

        if kept >= 50:
            x_fit = x[mask]
            y_fit = y[mask]
            b, a = np.polyfit(x_fit, y_fit, deg=1)

            y_hat = a + b * x_fit
            sse = float(np.sum((y_fit - y_hat) ** 2))
            sst = float(np.sum((y_fit - np.mean(y_fit)) ** 2))
            r2 = 1.0 - sse / sst if sst > 0 else np.nan

            x_line = np.array([x_lo, x_hi], dtype="float64")
            y_line = a + b * x_line
            ax.plot(x_line, y_line, linewidth=2)

            ax.text(
                0.02,
                0.98 - (0.08 if mode == "rolling" else 0.0),
                f"{mode}: slope={b:.3f}, R2={r2:.3f}, clipped={clipped}",
                transform=ax.transAxes,
                va="top",
                fontsize=9,
            )

    ax.set_title(title)
    ax.set_xlabel(f"{factor_col} factor return (bp)")
    ax.set_ylabel("butterfly return proxy (bp)")
    ax.set_xlim(float(x_lo), float(x_hi))
    ax.set_ylim(float(y_lo), float(y_hi))
    ax.axhline(0.0, linewidth=1, alpha=0.3)
    ax.axvline(0.0, linewidth=1, alpha=0.3)
    ax.legend()
    fig.savefig(out_path, dpi=150, bbox_inches="tight")
    plt.show()

save_scatter(scatter_by_mode, "f1", section8_dir / "scatter_bfly_vs_pc1.png", "Butterfly return proxy versus PC1 factor return")
save_scatter(scatter_by_mode, "f2", section8_dir / "scatter_bfly_vs_pc2.png", "Butterfly return proxy versus PC2 factor return")
save_scatter(scatter_by_mode, "f3", section8_dir / "scatter_bfly_vs_pc3.png", "Butterfly return proxy versus PC3 factor return")
+-----------+-------------+-------------+-------------+---------+-----------+---------+
| mode      |       alpha |       beta1 |       beta2 |   beta3 |        r2 |   n_obs |
|-----------+-------------+-------------+-------------+---------+-----------+---------|
| expanding | -1.0111e-05 | -0.0113066  | -0.0034671  | 1.22994 | 0.143153  |    8756 |
| rolling   |  8.0623e-06 |  0.00538996 |  0.00154407 | 1.08431 | 0.0934856 |    8252 |
+-----------+-------------+-------------+-------------+---------+-----------+---------+

Show code
# 8.6 Robustness grid
# Sensitivity sweep across PCA and z-score settings (rolling + expanding).
def build_refit_schedule_from_index(
    index: pd.DatetimeIndex, mode: str, window_obs: int, refit_step_obs: int
) -> list[tuple[pd.Timestamp, pd.Timestamp, pd.Timestamp]]:
    if mode == "expanding":
        first_fit_pos = 252 - 1
        if first_fit_pos < 0:
            first_fit_pos = 0
    else:
        first_fit_pos = window_obs - 1
    if first_fit_pos >= len(index):
        return []
    refit_dates = list(index[first_fit_pos::refit_step_obs])
    windows = []
    for refit_date in refit_dates:
        if mode == "expanding":
            window_start = index[0]
        else:
            pos = index.get_loc(refit_date)
            window_start = index[pos - window_obs + 1]
        window_end = refit_date
        windows.append((refit_date, window_start, window_end))
    return windows


def run_backtest_config(
    mode: str,
    pca_window_obs: int,
    refit_step_obs: int,
    z_window_obs: int,
    entry_z: float,
    exit_z: float,
) -> dict:
    # Lightweight backtest for robustness grid (no file output).
    dv01 = duration_scaled_return_proxy.copy()
    windows = build_refit_schedule_from_index(dv01.index, mode, pca_window_obs, refit_step_obs)
    if not windows:
        return {}

    ridge = float(backtest_spec["parameter_defaults"].get("ridge", 1e-6))
    weight_rows = []
    prev_w = None
    leg_idx = [tenors.index(t) for t in butterfly_legs]
    for refit_date, window_start, window_end in windows:
        window = dv01.loc[window_start:window_end, tenors]
        X = window.values
        Xc = X - X.mean(axis=0)
        loadings, _, _ = pca_svd(Xc, k=3)
        loadings = align_signs(loadings, tenors)
        A_leg = np.asarray(loadings, dtype="float64")[:, leg_idx]
        cond_leg = float(np.linalg.cond(A_leg))
        if (not np.isfinite(cond_leg)) or (cond_leg > butterfly_max_cond):
            if prev_w is None:
                return {}
            w = prev_w.copy()
        else:
            w_candidate = solve_pca_neutral_butterfly_weights(loadings, tenors, butterfly_legs, ridge=ridge)
            w_leg = w_candidate[leg_idx]
            l1_leg = float(np.abs(w_leg).sum())
            max_abs_leg = float(np.abs(w_leg).max())
            if (l1_leg > butterfly_max_l1) or (max_abs_leg > butterfly_max_abs):
                if prev_w is None:
                    return {}
                w = prev_w.copy()
            else:
                w = w_candidate
        prev_w = w.copy()
        for t_idx, tenor in enumerate(tenors):
            weight_rows.append({"refit_date": refit_date, "tenor": tenor, "weight": float(w[t_idx])})

    weights_refit = pd.DataFrame(weight_rows)
    weights_wide = weights_refit.pivot(index="refit_date", columns="tenor", values="weight").sort_index()
    weights_wide = weights_wide.reindex(columns=tenors).fillna(0.0)
    weights_daily, _ = apply_causality_shift(weights_wide, dv01.index, "robust_weights")

    dv01_aligned = dv01.reindex(weights_daily.index)
    valid_weights = weights_daily[butterfly_legs].notna().all(axis=1)
    valid_returns = dv01_aligned[butterfly_legs].notna().all(axis=1)
    valid_mask = valid_weights & valid_returns

    residual_return = (weights_daily * dv01_aligned).sum(axis=1, min_count=1)
    residual_level = residual_return.cumsum()
    mean_r = residual_level.rolling(z_window_obs, min_periods=z_window_obs).mean()
    std_r = residual_level.rolling(z_window_obs, min_periods=z_window_obs).std(ddof=0)
    z = (residual_level - mean_r) / std_r
    z = z.where(std_r > 0, np.nan)

    valid_mask = valid_mask & z.shift(1).notna()
    state = run_state_machine(
        z, entry_z, exit_z, int(backtest_spec["parameter_defaults"]["max_holding_obs"]), valid_mask
    )

    pos_vec = weights_daily.mul(state["state"], axis=0)
    pos_vec = pos_vec.where(state["state"] != 0, 0.0)
    pnl = (pos_vec * dv01.reindex(weights_daily.index)).sum(axis=1, min_count=1)
    pnl = pnl.where(valid_mask, 0.0)
    turnover = 0.5 * pos_vec.sub(pos_vec.shift(1)).abs().sum(axis=1)
    turnover = turnover.where(valid_mask, 0.0)

    cost_per_turnover = float(backtest_spec["parameter_defaults"]["cost_per_turnover"])
    pnl_gross = pnl
    cost = turnover * cost_per_turnover
    pnl_net = pnl_gross - cost

    m_g = compute_summary(pnl_gross, turnover)
    m_n = compute_summary(pnl_net, turnover)
    return {
        "ann_ret_gross": m_g["ann_ret"],
        "ann_vol_gross": m_g["ann_vol"],
        "sharpe_gross": m_g["sharpe"],
        "max_dd_gross": m_g["max_drawdown"],
        "ann_ret_net": m_n["ann_ret"],
        "ann_vol_net": m_n["ann_vol"],
        "sharpe_net": m_n["sharpe"],
        "max_dd_net": m_n["max_drawdown"],
        "avg_turnover": m_g["avg_turnover"],
    }


rolling_pca_windows_obs = [252, 504]
refit_steps_obs = [63]
z_windows_obs = [63, 126, 252]
thresholds = [(1.5, 0.5), (2.0, 0.5), (2.0, 1.0)]

robust_rows = []
for pca_window_obs in rolling_pca_windows_obs:
    for refit_step_obs in refit_steps_obs:
        for z_window_obs in z_windows_obs:
            for entry_z, exit_z in thresholds:
                res = run_backtest_config(
                    "rolling", pca_window_obs, refit_step_obs, z_window_obs, entry_z, exit_z
                )
                if not res:
                    continue
                robust_rows.append(
                    {
                        "variant": "rolling",
                        "pca_window_obs": pca_window_obs,
                        "refit_step_obs": refit_step_obs,
                        "z_window_obs": z_window_obs,
                        "enter_z": entry_z,
                        "exit_z": exit_z,
                        **res,
                    }
                )

for refit_step_obs in refit_steps_obs:
    for z_window_obs in z_windows_obs:
        for entry_z, exit_z in thresholds:
            res = run_backtest_config(
                "expanding", 252, refit_step_obs, z_window_obs, entry_z, exit_z
            )
            if not res:
                continue
            robust_rows.append(
                {
                    "variant": "expanding",
                    "pca_window_obs": 252,
                    "refit_step_obs": refit_step_obs,
                    "z_window_obs": z_window_obs,
                    "enter_z": entry_z,
                    "exit_z": exit_z,
                    **res,
                }
            )

robust_df = pd.DataFrame(robust_rows)
write_df_csv_and_md(
    robust_df,
    section8_dir / "robustness_results.csv",
    section8_dir / "robustness_results.md",
)

heat_slice = robust_df.loc[
    (robust_df["variant"] == "rolling")
    & (robust_df["pca_window_obs"] == rolling_pca_windows_obs[0])
    & (robust_df["refit_step_obs"] == refit_steps_obs[0])
]

if len(heat_slice):
    pivot_gross = heat_slice.pivot_table(
        index="z_window_obs", columns="enter_z", values="sharpe_gross", aggfunc="mean"
    )
    fig, ax = plt.subplots()
    im = ax.imshow(pivot_gross.values, aspect="auto")
    ax.set_xticks(range(len(pivot_gross.columns)))
    ax.set_xticklabels(pivot_gross.columns)
    ax.set_yticks(range(len(pivot_gross.index)))
    ax.set_yticklabels(pivot_gross.index)
    ax.set_title("Sharpe heatmap (rolling, gross)")
    for i in range(pivot_gross.shape[0]):
        for j in range(pivot_gross.shape[1]):
            val = pivot_gross.values[i, j]
            ax.text(j, i, f"{val:.2f}" if pd.notna(val) else "", ha="center", va="center", color="white")
    fig.colorbar(im, ax=ax, fraction=0.046, pad=0.04)
    fig.tight_layout()
    fig.savefig(section8_dir / "robustness_heatmap_sharpe_gross.png", dpi=150, bbox_inches="tight")
    plt.show()

    pivot_net = heat_slice.pivot_table(
        index="z_window_obs", columns="enter_z", values="sharpe_net", aggfunc="mean"
    )
    fig, ax = plt.subplots()
    im = ax.imshow(pivot_net.values, aspect="auto")
    ax.set_xticks(range(len(pivot_net.columns)))
    ax.set_xticklabels(pivot_net.columns)
    ax.set_yticks(range(len(pivot_net.index)))
    ax.set_yticklabels(pivot_net.index)
    ax.set_title("Sharpe heatmap (rolling, net)")
    for i in range(pivot_net.shape[0]):
        for j in range(pivot_net.shape[1]):
            val = pivot_net.values[i, j]
            ax.text(j, i, f"{val:.2f}" if pd.notna(val) else "", ha="center", va="center", color="white")
    fig.colorbar(im, ax=ax, fraction=0.046, pad=0.04)
    fig.tight_layout()
    fig.savefig(section8_dir / "robustness_heatmap_sharpe_net.png", dpi=150, bbox_inches="tight")
    plt.show()

Show code
# 8.7 Macro correlations and heatmap cell
# Correlate strategy PnL with macro series at daily/weekly frequency.
input_path = "data/combined/all_datasets_wide.parquet"
if not Path(input_path).exists():
    raise FileNotFoundError(input_path)

df_macro = pd.read_parquet(input_path)
if "date" in df_macro.columns:
    df_macro["date"] = pd.to_datetime(df_macro["date"], errors="coerce")
    df_macro = df_macro.set_index("date")
df_macro.index = pd.to_datetime(df_macro.index, errors="coerce")
df_macro.index = ensure_naive_dates(df_macro.index)
df_macro = df_macro.sort_index()

daily_pnl = pnl_net_exp if pnl_net_exp is not None and len(pnl_net_exp) else pnl_gross_exp

eurofx = df_macro["eurofx"].astype("float64")
eurofx_ret = np.log(eurofx).diff()
rrp = df_macro["rrp"].astype("float64")
rrp_chg = rrp.diff()

daily_df = pd.DataFrame(
    {
        "pnl": daily_pnl,
        "eurofx_ret": eurofx_ret,
        "rrp_dchange": rrp_chg,
    }
).dropna()

pearson_daily = daily_df.corr(method="pearson")
spearman_daily = daily_df.corr(method="spearman")

pearson_daily.to_csv(section8_dir / "macro_corr_daily_pearson.csv")
spearman_daily.to_csv(section8_dir / "macro_corr_daily_spearman.csv")

def plot_corr_heatmap(corr: pd.DataFrame, title: str, out_path: Path) -> None:
    fig, ax = plt.subplots()
    im = ax.imshow(corr.values, vmin=-1, vmax=1)
    ax.set_xticks(range(len(corr.columns)))
    ax.set_xticklabels(corr.columns, rotation=45, ha="right")
    ax.set_yticks(range(len(corr.index)))
    ax.set_yticklabels(corr.index)
    ax.set_title(title)
    for i in range(corr.shape[0]):
        for j in range(corr.shape[1]):
            ax.text(j, i, f"{corr.values[i, j]:.2f}", ha="center", va="center", color="white")
    fig.colorbar(im, ax=ax, fraction=0.046, pad=0.04)
    fig.tight_layout()
    fig.savefig(out_path, dpi=150, bbox_inches="tight")
    plt.show()


plot_corr_heatmap(
    pearson_daily,
    "Daily macro correlation (Pearson)",
    section8_dir / "macro_corr_heatmap_daily_pearson.png",
)
plot_corr_heatmap(
    spearman_daily,
    "Daily macro correlation (Spearman)",
    section8_dir / "macro_corr_heatmap_daily_spearman.png",
)

weekly_rule = "W-FRI"
weekly_pnl = daily_pnl.resample(weekly_rule).sum()
weekly_eurofx = eurofx_ret.resample(weekly_rule).sum()
weekly_rrp = rrp_chg.resample(weekly_rule).sum()
fed_assets = df_macro["fed_assets"].astype("float64")
tga = df_macro["tga"].astype("float64")
weekly_fed_assets = fed_assets.resample(weekly_rule).last().diff()
weekly_tga = tga.resample(weekly_rule).last().diff()

weekly_df = pd.DataFrame(
    {
        "weekly_pnl": weekly_pnl,
        "eurofx_weekly_ret": weekly_eurofx,
        "rrp_weekly_change": weekly_rrp,
        "fed_assets_weekly_change": weekly_fed_assets,
        "tga_weekly_change": weekly_tga,
    }
).dropna()

pearson_weekly = weekly_df.corr(method="pearson")
spearman_weekly = weekly_df.corr(method="spearman")

pearson_weekly.to_csv(section8_dir / "macro_corr_weekly_pearson.csv")
spearman_weekly.to_csv(section8_dir / "macro_corr_weekly_spearman.csv")

plot_corr_heatmap(
    pearson_weekly,
    "Weekly macro correlation (Pearson, W-FRI)",
    section8_dir / "macro_corr_heatmap_weekly_pearson.png",
)
plot_corr_heatmap(
    spearman_weekly,
    "Weekly macro correlation (Spearman, W-FRI)",
    section8_dir / "macro_corr_heatmap_weekly_spearman.png",
)

Appendix: Data gap diagnostics

Show code
# A.1 Index coverage summary
idx_rows = []

def add_index_row(name: str, index: pd.DatetimeIndex | pd.Index) -> None:
    if index is None or len(index) == 0:
        idx_rows.append({"name": name, "start": None, "end": None, "n_obs": 0})
        return
    idx_rows.append({"name": name, "start": index.min(), "end": index.max(), "n_obs": int(len(index))})

add_index_row("raw_index", df_raw.index)
add_index_row("curve_index", df_curve.index)
add_index_row("yields_index", df_yields.index)
add_index_row("return_proxy_index", duration_scaled_return_proxy.index)
idx_bt = None
if bt_exp and "bt" in bt_exp:
    idx_bt = bt_exp["bt"].index.intersection(duration_scaled_return_proxy.index)
elif bt_roll and "bt" in bt_roll:
    idx_bt = bt_roll["bt"].index.intersection(duration_scaled_return_proxy.index)
add_index_row("idx_bt", idx_bt)
add_index_row("bt_expanding", bt_exp["bt"].index if bt_exp else None)
add_index_row("bt_rolling", bt_roll["bt"].index if bt_roll else None)

index_summary = pd.DataFrame(idx_rows)
show_table(index_summary)

missing_in_raw = df_curve.index.difference(df_raw.index)
missing_in_return_proxy = df_curve.index.difference(duration_scaled_return_proxy.index)

missing_summary = pd.DataFrame(
    [
        {
            "item": "missing_in_raw",
            "n_missing": int(len(missing_in_raw)),
            "first": missing_in_raw.min() if len(missing_in_raw) else None,
            "last": missing_in_raw.max() if len(missing_in_raw) else None,
        },
        {
            "item": "missing_in_return_proxy",
            "n_missing": int(len(missing_in_return_proxy)),
            "first": missing_in_return_proxy.min() if len(missing_in_return_proxy) else None,
            "last": missing_in_return_proxy.max() if len(missing_in_return_proxy) else None,
        },
    ]
)
show_table(missing_summary)
+--------------------+---------------------+---------------------+---------+
| name               | start               | end                 |   n_obs |
|--------------------+---------------------+---------------------+---------|
| raw_index          | 1990-01-02 00:00:00 | 2026-01-16 00:00:00 |    9017 |
| curve_index        | 1990-01-02 00:00:00 | 2026-01-16 00:00:00 |    9017 |
| yields_index       | 1990-01-02 00:00:00 | 2026-01-16 00:00:00 |    9017 |
| return_proxy_index | 1990-01-03 00:00:00 | 2026-01-16 00:00:00 |    9008 |
| idx_bt             | 1991-01-07 00:00:00 | 2026-01-16 00:00:00 |    8756 |
| bt_expanding       | 1991-01-07 00:00:00 | 2026-01-16 00:00:00 |    8756 |
| bt_rolling         | 1993-01-12 00:00:00 | 2026-01-16 00:00:00 |    8252 |
+--------------------+---------------------+---------------------+---------+
+-------------------------+-------------+---------------------+---------------------+
| item                    |   n_missing | first               | last                |
|-------------------------+-------------+---------------------+---------------------|
| missing_in_raw          |           0 | NaT                 | NaT                 |
| missing_in_return_proxy |           9 | 1990-01-02 00:00:00 | 2010-10-12 00:00:00 |
+-------------------------+-------------+---------------------+---------------------+
Show code
# A.2 Tenor availability and missing streaks
avail = df_yields.notna()
def longest_missing_streak(mask: pd.Series) -> int:
    runs = 0
    best = 0
    for v in (~mask).values:
        if v:
            runs += 1
            best = max(best, runs)
        else:
            runs = 0
    return int(best)

rows_tenor = []
for col in avail.columns:
    s = avail[col]
    rows_tenor.append(
        {
            "tenor": col,
            "first_valid": s[s].index.min() if s.any() else None,
            "last_valid": s[s].index.max() if s.any() else None,
            "missing_frac": float((~s).mean()),
            "longest_missing_streak": longest_missing_streak(s),
        }
    )

tenor_missing_summary = pd.DataFrame(rows_tenor).sort_values("missing_frac", ascending=False)
show_table(tenor_missing_summary)

missing_count = (~avail).sum(axis=1)
threshold = int(max(1, min(3, len(avail.columns) // 2)))
dates_many_missing = missing_count[missing_count >= threshold]
missing_dates_table = pd.DataFrame(
    {
        "date": dates_many_missing.index,
        "missing_tenors": dates_many_missing.values,
    }
).head(10)
show_table(missing_dates_table)

segments_rows = []
segments_top = []
for col in avail.columns:
    segs = compute_missing_streaks(df_yields[col], df_yields.index)
    if len(segs):
        segs = segs.assign(tenor=col)
        segments_top.append(segs)
        segments_rows.append(
            {
                "tenor": col,
                "n_missing_segments": int(len(segs)),
                "max_segment_days": int(segs["length_days"].max()),
                "max_segment_business_days": int(segs["length_business_days"].max()),
            }
        )
    else:
        segments_rows.append(
            {
                "tenor": col,
                "n_missing_segments": 0,
                "max_segment_days": 0,
                "max_segment_business_days": 0,
            }
        )

segments_summary = pd.DataFrame(segments_rows).sort_values("n_missing_segments", ascending=False)
show_table(segments_summary)

if segments_top:
    all_segments = pd.concat(segments_top, ignore_index=True)
    worst_segments = all_segments.sort_values("length_days", ascending=False).head(10)
    show_table(worst_segments)

fig, ax = plt.subplots()
ax.plot(missing_count.index, (len(avail.columns) - missing_count).values)
ax.set_title("Available tenors over time")
ax.set_xlabel("date")
ax.set_ylabel("n_available")
fig.autofmt_xdate()
fig.savefig(section8_dir / "appendix_available_tenors.png", dpi=150, bbox_inches="tight")
plt.show()

fig, ax = plt.subplots(figsize=(10, 4))
im = ax.imshow(avail.T.astype(int), aspect="auto", interpolation="nearest")
ax.set_title("Yield availability heatmap (1=available)")
ax.set_ylabel("tenor")
ax.set_xlabel("date index")
ax.set_yticks(range(len(avail.columns)))
ax.set_yticklabels(avail.columns)
fig.colorbar(im, ax=ax, fraction=0.046, pad=0.04)
fig.tight_layout()
fig.savefig(section8_dir / "appendix_yield_availability_heatmap.png", dpi=150, bbox_inches="tight")
plt.show()
+---------+---------------------+---------------------+----------------+--------------------------+
| tenor   | first_valid         | last_valid          |   missing_frac |   longest_missing_streak |
|---------+---------------------+---------------------+----------------+--------------------------|
| 3_mo    | 1990-01-02 00:00:00 | 2026-01-16 00:00:00 |    0.000443607 |                        1 |
| 6_mo    | 1990-01-02 00:00:00 | 2026-01-16 00:00:00 |    0.000110902 |                        1 |
| 1_yr    | 1990-01-02 00:00:00 | 2026-01-16 00:00:00 |    0.000110902 |                        1 |
| 2_yr    | 1990-01-02 00:00:00 | 2026-01-16 00:00:00 |    0.000110902 |                        1 |
| 3_yr    | 1990-01-02 00:00:00 | 2026-01-16 00:00:00 |    0.000110902 |                        1 |
| 5_yr    | 1990-01-02 00:00:00 | 2026-01-16 00:00:00 |    0.000110902 |                        1 |
| 7_yr    | 1990-01-02 00:00:00 | 2026-01-16 00:00:00 |    0.000110902 |                        1 |
| 10_yr   | 1990-01-02 00:00:00 | 2026-01-16 00:00:00 |    0.000110902 |                        1 |
+---------+---------------------+---------------------+----------------+--------------------------+
+---------------------+------------------+
| date                |   missing_tenors |
|---------------------+------------------|
| 2010-10-11 00:00:00 |                8 |
+---------------------+------------------+
+---------+----------------------+--------------------+-----------------------------+
| tenor   |   n_missing_segments |   max_segment_days |   max_segment_business_days |
|---------+----------------------+--------------------+-----------------------------|
| 3_mo    |                    4 |                  1 |                           1 |
| 6_mo    |                    1 |                  1 |                           1 |
| 1_yr    |                    1 |                  1 |                           1 |
| 2_yr    |                    1 |                  1 |                           1 |
| 3_yr    |                    1 |                  1 |                           1 |
| 5_yr    |                    1 |                  1 |                           1 |
| 7_yr    |                    1 |                  1 |                           1 |
| 10_yr   |                    1 |                  1 |                           1 |
+---------+----------------------+--------------------+-----------------------------+
+---------------------+---------------------+---------------+------------------------+---------+
| segment_start       | segment_end         |   length_days |   length_business_days | tenor   |
|---------------------+---------------------+---------------+------------------------+---------|
| 2008-12-10 00:00:00 | 2008-12-10 00:00:00 |             1 |                      1 | 3_mo    |
| 2008-12-18 00:00:00 | 2008-12-18 00:00:00 |             1 |                      1 | 3_mo    |
| 2008-12-24 00:00:00 | 2008-12-24 00:00:00 |             1 |                      1 | 3_mo    |
| 2010-10-11 00:00:00 | 2010-10-11 00:00:00 |             1 |                      1 | 3_mo    |
| 2010-10-11 00:00:00 | 2010-10-11 00:00:00 |             1 |                      1 | 6_mo    |
| 2010-10-11 00:00:00 | 2010-10-11 00:00:00 |             1 |                      1 | 1_yr    |
| 2010-10-11 00:00:00 | 2010-10-11 00:00:00 |             1 |                      1 | 2_yr    |
| 2010-10-11 00:00:00 | 2010-10-11 00:00:00 |             1 |                      1 | 3_yr    |
| 2010-10-11 00:00:00 | 2010-10-11 00:00:00 |             1 |                      1 | 5_yr    |
| 2010-10-11 00:00:00 | 2010-10-11 00:00:00 |             1 |                      1 | 7_yr    |
+---------------------+---------------------+---------------+------------------------+---------+

Show code
# A.3 Dropna impact by era
n_all = int(df_yields.dropna(how="any").shape[0])
n_any = int(df_yields.dropna(how="all").shape[0])

era_rows = []
for era_name, start, end in eras:
    sub = df_yields.loc[(df_yields.index >= start) & (df_yields.index <= end)]
    era_rows.append(
        {
            "era": era_name,
            "n_any": int(sub.dropna(how="all").shape[0]),
            "n_all": int(sub.dropna(how="any").shape[0]),
            "lost_dates": int(sub.dropna(how="all").shape[0] - sub.dropna(how="any").shape[0]),
        }
    )

dropna_summary = pd.DataFrame(era_rows)
show_table(dropna_summary)
+-----------+---------+---------+--------------+
| era       |   n_any |   n_all |   lost_dates |
|-----------+---------+---------+--------------|
| pre_2008  |    4503 |    4503 |            0 |
| post_2008 |    3002 |    2999 |            3 |
| post_2020 |    1511 |    1511 |            0 |
+-----------+---------+---------+--------------+
Show code
# A.4 Candidate policies (exploratory only)
weights_exp = bt_exp["weights"] if bt_exp else None
if weights_exp is not None and len(weights_exp):
    tenor_set = weights_exp.columns.tolist()
    mask_all = df_yields.dropna(how="any").index
    mask_policy1 = df_yields[tenor_set].notna().all(axis=1)
    extra_days = int(mask_policy1.sum() - len(mask_all))
    policy1_summary = pd.DataFrame(
        [
            {
                "policy": "tenor_subset_only",
                "n_tradable_days": int(mask_policy1.sum()),
                "extra_days_vs_all": extra_days,
            }
        ]
    )
    show_table(policy1_summary)

    loadings_exp_path = derived_dir / "pca_loadings_daily_expanding.parquet"
    if loadings_exp_path.exists():
        loadings_exp = pd.read_parquet(loadings_exp_path)
        loadings_exp["trade_date"] = pd.to_datetime(loadings_exp["trade_date"], errors="coerce")
        pcs = {
            pc: loadings_exp.loc[loadings_exp["pc"] == pc]
            .pivot(index="trade_date", columns="tenor", values="loading")
            for pc in [1, 2, 3]
        }
        sample_start = pd.Timestamp("2007-01-01")
        sample_end = pd.Timestamp("2007-03-31")
        sample_idx = df_yields.index[(df_yields.index >= sample_start) & (df_yields.index <= sample_end)]
        solvable = 0
        for d in sample_idx:
            avail_cols = df_yields.loc[d].dropna().index.tolist()
            if not all(t in avail_cols for t in butterfly_legs):
                continue
            L1 = pcs[1].loc[d, butterfly_legs].values
            L2 = pcs[2].loc[d, butterfly_legs].values
            L3 = pcs[3].loc[d, butterfly_legs].values
            A = np.vstack([L1, L2, L3])  # (3, 3) on the butterfly legs
            b = np.array([0.0, 0.0, 1.0])
            gram = A @ A.T
            try:
                _ = A.T @ np.linalg.solve(gram + ridge * np.eye(3), b)
                solvable += 1
            except np.linalg.LinAlgError:
                continue

        policy2_summary = pd.DataFrame(
            [
                {
                    "policy": "resolve_on_butterfly_legs_only",
                    "sample_start": sample_start,
                    "sample_end": sample_end,
                    "n_days_sample": int(len(sample_idx)),
                    "n_days_solvable": int(solvable),
                }
            ]
        )
        show_table(policy2_summary)
        show_table(pd.DataFrame([{"note": "re-solving changes strategy definition"}]))
+-------------------+-------------------+---------------------+
| policy            |   n_tradable_days |   extra_days_vs_all |
|-------------------+-------------------+---------------------|
| tenor_subset_only |              9013 |                   0 |
+-------------------+-------------------+---------------------+
+--------------------------------+---------------------+---------------------+-----------------+-------------------+
| policy                         | sample_start        | sample_end          |   n_days_sample |   n_days_solvable |
|--------------------------------+---------------------+---------------------+-----------------+-------------------|
| resolve_on_butterfly_legs_only | 2007-01-01 00:00:00 | 2007-03-31 00:00:00 |              62 |                62 |
+--------------------------------+---------------------+---------------------+-----------------+-------------------+
+----------------------------------------+
| note                                   |
|----------------------------------------|
| re-solving changes strategy definition |
+----------------------------------------+

Appendix: Helpers

Show code
# small helpers used by diagnostics
def first_all_non_null_date(df_table, cols, index):
    idx = index.intersection(df_table.index)
    if len(idx) == 0:
        return None
    sub = df_table.loc[idx, cols]
    mask = sub.notna().all(axis=1)
    if mask.any():
        return mask[mask].index[0]
    return None