# ═══════════════════════════════════════════════════════════════════════════════

# CELL 0 — Environment, reproducibility, and version audit

# ═══════════════════════════════════════════════════════════════════════════════



import sys, os, random, time, warnings

from pathlib import Path

warnings.filterwarnings('ignore')



os.environ.setdefault('OMP_NUM_THREADS', '1')

os.environ.setdefault('OPENBLAS_NUM_THREADS', '1')

os.environ.setdefault('MKL_NUM_THREADS', '1')



# ── Resolve project root: walk upward until a /src directory is found ─────────

proj_root = os.path.abspath('..')

if not os.path.isdir(os.path.join(proj_root, 'src')):

    p = os.getcwd()

    while True:

        if os.path.isdir(os.path.join(p, 'src')):

            proj_root = p

            break

        parent = os.path.dirname(p)

        if parent == p:

            proj_root = os.getcwd()

            break

        p = parent



if proj_root not in sys.path:

    sys.path.insert(0, proj_root)



active_conda_env = os.environ.get('CONDA_DEFAULT_ENV', '')

python_executable = Path(sys.executable).resolve()

if active_conda_env == 'qaoa':

    environment_label = 'conda:qaoa'

else:

    environment_label = f'python:{python_executable}'

    print('Warning: expected the qaoa conda environment.')

    print('The notebook will continue because the active kernel may already satisfy the dependencies.')



# ── Core scientific stack ──────────────────────────────────────────────────────

import numpy as np

import torch

import torch.nn.functional as F

import pandas as pd

import matplotlib

import matplotlib.pyplot as plt

import networkx as nx



# ── Project modules ────────────────────────────────────────────────────────────

from src.gnn import SimpleGCN, PYG_AVAILABLE

from src.notebook_export import export_notebook_html as export_notebook_html_artifact



# ── Global random seed (pins NumPy, PyTorch, and Python random) ───────────────

SEED = 42

random.seed(SEED)

np.random.seed(SEED)

torch.manual_seed(SEED)

torch.set_num_threads(max(1, min(4, os.cpu_count() or 1)))

if torch.cuda.is_available():

    torch.cuda.manual_seed_all(SEED)



# ── Matplotlib: publication-grade style ───────────────────────────────────────

plt.style.use('seaborn-v0_8-whitegrid')

plt.rcParams.update({

    'figure.dpi': 130,

    'axes.titlesize': 13,

    'axes.labelsize': 11,

    'axes.spines.top': False,

    'axes.spines.right': False,

    'legend.frameon': False,

    'font.family': 'DejaVu Sans',

    'lines.linewidth': 2.0,

    'xtick.labelsize': 9,

    'ytick.labelsize': 9,

})



# ── Output and export directories ─────────────────────────────────────────────

out_dir = os.path.join(proj_root, 'outputs')

os.makedirs(out_dir, exist_ok=True)

notebook_path = Path(proj_root) / 'notebooks' / 'quantum_ai_bio_combined.ipynb'

notebook_figure_dir = Path(proj_root) / 'notebooks' / 'figures'

html_output_dir = Path(proj_root) / 'website' / 'notebooks_html'

html_figure_dir = html_output_dir / 'figures'

for figure_dir in (notebook_figure_dir, html_figure_dir):

    figure_dir.mkdir(parents=True, exist_ok=True)





def save_notebook_figure(fig, figure_name, dpi=180):

    saved_paths = []

    for figure_path in (notebook_figure_dir / figure_name, html_figure_dir / figure_name):

        fig.savefig(figure_path, dpi=dpi, bbox_inches='tight')

        saved_paths.append(figure_path)

    print(f"Saved figure assets -> {saved_paths[0]}")

    print(f"                     -> {saved_paths[1]}")





def export_notebook_html(output_name='quantum_ai_bio_combined.html'):

    export_script = Path(proj_root) / 'scripts' / 'export_notebook_html.py'

    export_cmd = [

        sys.executable,

        str(export_script),

        str(notebook_path),

        '--output',

        output_name,

        '--output-dir',

        str(html_output_dir),

    ]

    print('export helper:')

    print('  ' + ' '.join(export_cmd))

    output_path = export_notebook_html_artifact(

        notebook_path=notebook_path,

        output_name=output_name,

        output_dir=html_output_dir,

    )

    print(f'Exported HTML: {output_path}')

    return output_path



# ── Hardware audit ────────────────────────────────────────────────────────────

device_str = 'cuda' if torch.cuda.is_available() else 'cpu'



# ── Print audit trail ─────────────────────────────────────────────────────────

sep = '=' * 72

print(sep)

print('  Hybrid Quantum-Classical Pipeline — Execution Audit')

print(sep)

print(f'  Project root     : {proj_root}')

print(f'  Environment      : {environment_label}')

print(f'  Python           : {sys.version.split()[0]}')

print(f'  NumPy            : {np.__version__}')

print(f'  PyTorch          : {torch.__version__}  ({device_str})')

print(f'  NetworkX         : {nx.__version__}')

# ═══════════════════════════════════════════════════════════════════════════════
# CELL 2 — Real transcriptomic benchmark setup (OpenML prostate cohort)
# ═══════════════════════════════════════════════════════════════════════════════

from pathlib import Path
import copy
import json
import math

import torch.optim as optim
from scipy.optimize import minimize
from sklearn.datasets import fetch_openml

QAOA_DEPTH = 2
LEGACY_DEPTH = 1
GENE_COUNT = 16
CACHE_DEFAULT_GENE_COUNT = 32
TARGET_EDGE_COUNT = 18
ADAPT_GRAPH_COUNT = 24
BENCHMARK_GRAPH_COUNT = 6
SUBSAMPLE_SIZE = 60

CACHE_PANEL_PATH = os.path.join(proj_root, "data", "prostate_top32_variance_panel.csv.gz")
CACHE_META_PATH = os.path.join(proj_root, "data", "prostate_top32_variance_panel_meta.json")
LEGACY_CACHE_PANEL_PATH = os.path.join(proj_root, "data", "prostate_top10_variance_panel.csv.gz")
LEGACY_CACHE_META_PATH = os.path.join(proj_root, "data", "prostate_top10_variance_panel_meta.json")
OUTPUT_GRAPH_PATH = os.path.join(proj_root, "outputs", "maxcut_graph.csv")
OUTPUT_ANGLES_PATH = os.path.join(proj_root, "outputs", "qaoa_classical_angles.csv")


def resolve_transcriptomic_cache_paths(preferred_panel_path=CACHE_PANEL_PATH, preferred_meta_path=CACHE_META_PATH):
    candidate_pairs = [(preferred_panel_path, preferred_meta_path)]
    if (preferred_panel_path, preferred_meta_path) != (LEGACY_CACHE_PANEL_PATH, LEGACY_CACHE_META_PATH):
        candidate_pairs.append((LEGACY_CACHE_PANEL_PATH, LEGACY_CACHE_META_PATH))

    for panel_path, meta_path in candidate_pairs:
        if os.path.exists(panel_path) and os.path.exists(meta_path):
            return panel_path, meta_path
    return preferred_panel_path, preferred_meta_path


def build_cut_diagonal(n, edges):
    cut_diagonal = np.zeros(2 ** n, dtype=np.float64)
    for state_index in range(2 ** n):
        bits = [(state_index >> bit) & 1 for bit in range(n)]
        cut_diagonal[state_index] = sum(bits[u] != bits[v] for u, v in edges)
    return cut_diagonal


def apply_rx_all(state, n, beta):
    rx = np.array(
        [
            [np.cos(beta), -1j * np.sin(beta)],
            [-1j * np.sin(beta), np.cos(beta)],
        ],
        dtype=np.complex128,
    )
    psi = state.reshape((2,) * n)
    for axis in range(n):
        psi = np.moveaxis(psi, axis, 0)
        psi = np.tensordot(rx, psi, axes=([1], [0]))
        psi = np.moveaxis(psi, 0, axis)
    return psi.reshape(-1)


def qaoa_state_fast(cut_diagonal, gammas, betas):
    num_states = cut_diagonal.shape[0]
    n_qubits = int(np.log2(num_states))
    state = np.ones(num_states, dtype=np.complex128) / np.sqrt(num_states)
    for gamma, beta in zip(gammas, betas):
        state = state * np.exp(-1j * gamma * cut_diagonal)
        state = apply_rx_all(state, n_qubits, beta)
    return state


def expected_cut_fast(cut_diagonal, state):
    return float(np.dot(cut_diagonal, np.abs(state) ** 2))


def brute_force_maxcut(n, edges):
    best_cut = -1
    best_mask = 0
    for mask in range(1, 2 ** n):
        cut_value = sum(1 for u, v in edges if ((mask >> u) & 1) != ((mask >> v) & 1))
        if cut_value > best_cut:
            best_cut = cut_value
            best_mask = mask
    return best_cut, best_mask


def normalize_angles(raw_angles, p):
    raw_angles = np.asarray(raw_angles, dtype=np.float64).reshape(-1)
    gammas = np.mod(raw_angles[:p], math.pi)
    betas = np.mod(raw_angles[p : 2 * p], math.pi / 2)
    return gammas, betas


def qaoa_value_for_angles(cut_diagonal, gammas, betas):
    state = qaoa_state_fast(cut_diagonal, gammas, betas)
    return expected_cut_fast(cut_diagonal, state), state


def classical_optimize_instance(instance, p, num_starts=8, maxiter=320, seed=0):
    cut_diagonal = instance["cut_diagonal"]
    rng = np.random.default_rng(seed)
    best = None

    def objective(x):
        gammas, betas = normalize_angles(x, p)
        value, _ = qaoa_value_for_angles(cut_diagonal, gammas, betas)
        return -value

    for _ in range(num_starts):
        x0 = np.concatenate(
            [
                rng.uniform(0.0, math.pi, size=p),
                rng.uniform(0.0, math.pi / 2, size=p),
            ]
        )
        result = minimize(
            objective,
            x0,
            method="Nelder-Mead",
            options={"maxiter": maxiter, "xatol": 1e-6, "fatol": 1e-6},
        )
        gammas, betas = normalize_angles(result.x, p)
        value, state = qaoa_value_for_angles(cut_diagonal, gammas, betas)
        candidate = {
            "gammas": gammas,
            "betas": betas,
            "value": value,
            "state": state,
            "nit": result.nit,
            "nfev": result.nfev,
            "success": bool(result.success),
            "raw_angles": np.concatenate([gammas, betas]),
        }
        if best is None or candidate["value"] > best["value"]:
            best = candidate
    return best


def predict_instance_with_gnn(instance, model, p):
    adjacency_tensor = torch.tensor(instance["adjacency"], dtype=torch.float32)
    feature_tensor = torch.tensor(instance["features"], dtype=torch.float32)

    with torch.no_grad():
        _ = model(feature_tensor, adjacency_tensor)

    start_time = time.perf_counter()
    with torch.no_grad():
        raw_output = model(feature_tensor, adjacency_tensor).view(-1).cpu().numpy()
    inference_time = time.perf_counter() - start_time

    gammas, betas = normalize_angles(raw_output, p)
    value, state = qaoa_value_for_angles(instance["cut_diagonal"], gammas, betas)
    return {
        "gammas": gammas,
        "betas": betas,
        "value": value,
        "state": state,
        "inference_time": inference_time,
        "raw_output": raw_output,
    }


def load_transcriptomic_panel(cache_panel_path=CACHE_PANEL_PATH, cache_meta_path=CACHE_META_PATH):
    selected_panel_path, selected_meta_path = resolve_transcriptomic_cache_paths(cache_panel_path, cache_meta_path)
    if os.path.exists(selected_panel_path):
        panel_df = pd.read_csv(selected_panel_path, compression="gzip")
        meta = {}
        if os.path.exists(selected_meta_path):
            with open(selected_meta_path, "r", encoding="utf-8") as handle:
                meta = json.load(handle)
        gene_table = pd.DataFrame(meta.get("gene_table", []))
        if not gene_table.empty and "rank" in gene_table.columns:
            gene_table = gene_table.sort_values("rank").reset_index(drop=True)
        if not gene_table.empty:
            gene_names = gene_table["gene"].tolist()[:GENE_COUNT]
        else:
            gene_names = [column for column in panel_df.columns if not column.startswith("__")][:GENE_COUNT]
        labels = pd.Series(panel_df.pop("__target__").astype(str), name=meta.get("label_name", "class"))
        sample_ids = panel_df.pop("__sample_id__").astype(str)
        panel_df.index = sample_ids
        labels.index = sample_ids
        meta["cache_panel_path"] = selected_panel_path
        meta["cache_meta_path"] = selected_meta_path
        meta["cached_gene_count"] = int(meta.get("top_gene_count", len([column for column in panel_df.columns])))
        return panel_df[gene_names], labels, gene_names, meta

    dataset = fetch_openml(name="prostate", version=1, as_frame=True)
    expression_frame_full = dataset.data.apply(pd.to_numeric, errors="coerce")
    expression_frame_full = expression_frame_full.loc[:, expression_frame_full.notna().all(axis=0)]
    labels = pd.Series(dataset.target, name=dataset.target_names[0] if dataset.target_names else "class")
    variances = expression_frame_full.var(axis=0).sort_values(ascending=False)
    gene_names = variances.head(max(GENE_COUNT, CACHE_DEFAULT_GENE_COUNT)).index.tolist()
    labels.index = expression_frame_full.index
    panel_meta = {
        "dataset_name": "Prostate",
        "cached_gene_count": len(gene_names),
        "cache_panel_path": "Direct OpenML fetch",
        "cache_meta_path": "Direct OpenML fetch",
    }
    return expression_frame_full.loc[:, gene_names[:GENE_COUNT]].copy(), labels.astype(str), gene_names[:GENE_COUNT], panel_meta


def build_gene_correlation_graph(expression_frame, gene_names, target_edge_count=TARGET_EDGE_COUNT):
    correlation_matrix = expression_frame[gene_names].corr().abs().fillna(0.0).copy()
    correlation_values = correlation_matrix.to_numpy(copy=True)
    np.fill_diagonal(correlation_values, 0.0)
    correlation_matrix.iloc[:, :] = correlation_values

    complete_graph = nx.Graph()
    for gene_index, gene_name in enumerate(gene_names):
        complete_graph.add_node(gene_index, gene=gene_name)
    for i, gene_i in enumerate(gene_names):
        for j in range(i + 1, len(gene_names)):
            gene_j = gene_names[j]
            complete_graph.add_edge(i, j, weight=float(correlation_matrix.loc[gene_i, gene_j]))

    spanning_tree = nx.maximum_spanning_tree(complete_graph, weight="weight")
    remaining_edges = sorted(
        (
            (u, v, data["weight"])
            for u, v, data in complete_graph.edges(data=True)
            if not spanning_tree.has_edge(u, v)
        ),
        key=lambda item: item[2],
        reverse=True,
    )

    graph = nx.Graph()
    for node_index, gene_name in enumerate(gene_names):
        graph.add_node(node_index, gene=gene_name)
    for u, v, data in spanning_tree.edges(data=True):
        graph.add_edge(u, v, weight=data["weight"])
    for u, v, weight in remaining_edges:
        if graph.number_of_edges() >= target_edge_count:
            break
        graph.add_edge(u, v, weight=weight)

    edge_rows = []
    for u, v, data in graph.edges(data=True):
        edge_rows.append(
            {
                "gene_u": graph.nodes[u]["gene"],
                "gene_v": graph.nodes[v]["gene"],
                "abs_correlation": data["weight"],
            }
        )
    edge_table = pd.DataFrame(edge_rows).sort_values("abs_correlation", ascending=False).reset_index(drop=True)
    return graph, correlation_matrix, edge_table


def stratified_subsample_indices(labels, sample_size, seed):
    labels = pd.Series(labels)
    label_counts = labels.value_counts().sort_index()
    desired = label_counts / label_counts.sum() * sample_size
    counts = np.floor(desired).astype(int)
    remainder = sample_size - int(counts.sum())
    if remainder > 0:
        fractional = (desired - counts).sort_values(ascending=False)
        for label in fractional.index[:remainder]:
            counts.loc[label] += 1

    rng = np.random.default_rng(seed)
    chosen = []
    for label, count in counts.items():
        label_indices = labels[labels == label].index.to_numpy()
        chosen.extend(rng.choice(label_indices, size=int(count), replace=False).tolist())
    rng.shuffle(chosen)
    return chosen


def create_graph_instance(graph_id, graph, sample_indices, labels, split_name):
    n = graph.number_of_nodes()
    edges = list(graph.edges())
    best_cut, best_mask = brute_force_maxcut(n, edges)
    adjacency = nx.to_numpy_array(graph, dtype=np.float64) + np.eye(n)
    features = adjacency.sum(axis=1, keepdims=True).astype(np.float32)
    return {
        "graph_id": graph_id,
        "split": split_name,
        "graph": graph,
        "n": n,
        "edges": edges,
        "edge_count": len(edges),
        "density": nx.density(graph),
        "adjacency": adjacency,
        "features": features,
        "cut_diagonal": build_cut_diagonal(n, edges),
        "best_cut": best_cut,
        "best_mask": best_mask,
        "sample_indices": sample_indices,
        "sample_count": len(sample_indices),
        "class_balance": pd.Series(labels.loc[sample_indices]).value_counts().sort_index().to_dict(),
        "gene_labels": [graph.nodes[node]["gene"] for node in graph.nodes()],
    }


def build_graph_split(expression_frame, labels, gene_names, target_edge_count, split_name, split_size, subsample_size, base_seed):
    instances = []
    for offset in range(split_size):
        graph_seed = base_seed + offset
        sample_indices = stratified_subsample_indices(labels, subsample_size, seed=graph_seed)
        subset_expression = expression_frame.loc[sample_indices]
        graph, corr_matrix, edge_table = build_gene_correlation_graph(subset_expression, gene_names, target_edge_count)
        instance = create_graph_instance(graph_seed, graph, sample_indices, labels, split_name=split_name)
        instance["correlation_matrix"] = corr_matrix
        instance["edge_table"] = edge_table
        instances.append(instance)
    return instances


def build_transcriptomic_benchmark(expression_frame, labels, gene_names):
    representative_graph, representative_corr, representative_edge_table = build_gene_correlation_graph(
        expression_frame,
        gene_names,
        TARGET_EDGE_COUNT,
    )
    representative = create_graph_instance(
        graph_id=0,
        graph=representative_graph,
        sample_indices=expression_frame.index.tolist(),
        labels=labels,
        split_name="representative",
    )
    representative["correlation_matrix"] = representative_corr
    representative["edge_table"] = representative_edge_table
    adaptation_graphs = build_graph_split(
        expression_frame,
        labels,
        gene_names,
        TARGET_EDGE_COUNT,
        split_name="adaptation",
        split_size=ADAPT_GRAPH_COUNT,
        subsample_size=SUBSAMPLE_SIZE,
        base_seed=200,
    )
    benchmark_graphs = build_graph_split(
        expression_frame,
        labels,
        gene_names,
        TARGET_EDGE_COUNT,
        split_name="benchmark",
        split_size=BENCHMARK_GRAPH_COUNT,
        subsample_size=SUBSAMPLE_SIZE,
        base_seed=42,
    )
    return {
        "expression_frame": expression_frame,
        "labels": labels,
        "gene_names": gene_names,
        "representative": representative,
        "adaptation_graphs": adaptation_graphs,
        "benchmark_graphs": benchmark_graphs,
    }


expression_frame, labels, gene_names, panel_meta = load_transcriptomic_panel()
benchmark_bundle = build_transcriptomic_benchmark(expression_frame, labels, gene_names)
representative_graph = benchmark_bundle["representative"]

classical_start = time.perf_counter()
classical_reference = classical_optimize_instance(representative_graph, p=QAOA_DEPTH, num_starts=8, maxiter=320, seed=SEED)
classical_reference["time_seconds"] = time.perf_counter() - classical_start
classical_reference["ratio_to_exact"] = classical_reference["value"] / representative_graph["best_cut"]
classical_reference["partition_bits"] = np.array(
    [(representative_graph["best_mask"] >> idx) & 1 for idx in range(representative_graph["n"])],
    dtype=int,
)

legacy_model_loaded = False
legacy_reference = None
legacy_model = None
try:
    legacy_model = SimpleGCN(in_feats=1, hidden=32, out_feats=2, p=LEGACY_DEPTH)
    legacy_state = torch.load(os.path.join(proj_root, "model.pt"), map_location="cpu")
    legacy_model.load_state_dict(legacy_state)
    legacy_model.eval()
    legacy_model_loaded = True
    legacy_reference = predict_instance_with_gnn(representative_graph, legacy_model, LEGACY_DEPTH)
    legacy_reference["ratio_to_exact"] = legacy_reference["value"] / representative_graph["best_cut"]
except Exception as exc:
    legacy_reference = {"error": str(exc)}

representative_graph["edge_table"].rename(
    columns={"gene_u": "source", "gene_v": "target", "abs_correlation": "weight"}
).to_csv(OUTPUT_GRAPH_PATH, index=False)

angle_row = {}
for idx, value in enumerate(classical_reference["gammas"], start=1):
    angle_row[f"gamma_{idx}"] = value
for idx, value in enumerate(classical_reference["betas"], start=1):
    angle_row[f"beta_{idx}"] = value
pd.DataFrame([angle_row]).to_csv(OUTPUT_ANGLES_PATH, index=False)

representative_summary = pd.DataFrame(
    [
        {
            "graph_family": "OpenML prostate transcriptomic cohort",
            "patients": int(expression_frame.shape[0]),
            "genes": len(gene_names),
            "retained_edges": representative_graph["edge_count"],
            "exact_maxcut": int(representative_graph["best_cut"]),
            "classical_depth2_cut": round(classical_reference["value"], 4),
            "classical_depth2_ratio": round(classical_reference["ratio_to_exact"], 4),
        }
    ]
)

print("Transcriptomic benchmark initialized.")
print(f"Cache source: {panel_meta.get('cache_panel_path', 'Direct OpenML fetch')}")
print(f"Cached ranking size: {panel_meta.get('cached_gene_count', len(gene_names))}")
print(f"Gene panel: {', '.join(gene_names)}")
display(representative_summary)
if legacy_model_loaded:
    print(
        f"Legacy depth-1 checkpoint loaded: representative ratio = {legacy_reference['ratio_to_exact']:.4f}"
    )
else:
    print("Legacy depth-1 checkpoint not loaded cleanly; the notebook will continue with transcriptomic adaptation.")

Transcriptomic benchmark initialized.
Cache source: /Users/mohuyn/Library/CloudStorage/OneDrive-SAS/Documents/GitHub/Hybrid-Quantum-Graph-AI-QAOA-GNN-Biomedical-Optimization/data/prostate_top32_variance_panel.csv.gz
Cached ranking size: 32
Gene panel: 1008_f_at, 1894_f_at, 1804_at, AFFX-hum_alu_at, 40794_at, 37407_s_at, 31962_at, 1288_s_at, 32466_at, 40887_g_at, 37746_r_at, 1805_g_at, 35905_s_at, 34593_g_at, 32755_at, 36931_at

Legacy depth-1 checkpoint loaded: representative ratio = 0.6349

def attach_classical_targets(instances, p, num_starts=8, maxiter=320, seed_offset=0):
    enriched_instances = []
    rows = []
    for index, instance in enumerate(instances):
        classical_start = time.perf_counter()
        reference = classical_optimize_instance(
            instance,
            p=p,
            num_starts=num_starts,
            maxiter=maxiter,
            seed=seed_offset + int(instance["graph_id"]) + index,
        )
        reference["time_seconds"] = time.perf_counter() - classical_start
        enriched = dict(instance)
        enriched["classical_reference"] = reference
        enriched["target_angles"] = np.concatenate([reference["gammas"], reference["betas"]]).astype(np.float32)
        enriched_instances.append(enriched)
        rows.append(
            {
                "graph_id": instance["graph_id"],
                "exact_maxcut": instance["best_cut"],
                "classical_ratio": reference["value"] / instance["best_cut"],
                "iterations": reference["nit"],
                "evaluations": reference["nfev"],
            }
        )
    return enriched_instances, pd.DataFrame(rows)


def train_adapted_qaoa_gnn(train_instances, p, hidden_dim=64, epochs=500, lr=5e-3, weight_decay=1e-4, patience=50, seed=7):
    torch.manual_seed(seed)
    np.random.seed(seed)

    trained_model = SimpleGCN(in_feats=1, hidden=hidden_dim, out_feats=2, p=p)
    optimizer = optim.Adam(trained_model.parameters(), lr=lr, weight_decay=weight_decay)

    best_state = copy.deepcopy(trained_model.state_dict())
    best_loss = float("inf")
    best_epoch = 0
    stale_epochs = 0
    loss_history = []

    for epoch in range(1, epochs + 1):
        trained_model.train()
        running_loss = 0.0
        for instance in train_instances:
            adjacency_tensor = torch.tensor(instance["adjacency"], dtype=torch.float32)
            feature_tensor = torch.tensor(instance["features"], dtype=torch.float32)
            target_tensor = torch.tensor(instance["target_angles"], dtype=torch.float32)

            prediction = trained_model(feature_tensor, adjacency_tensor).view(-1)
            loss = ((prediction - target_tensor) ** 2).mean()

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            running_loss += float(loss.item())

        mean_loss = running_loss / max(1, len(train_instances))
        loss_history.append(mean_loss)
        if mean_loss + 1e-8 < best_loss:
            best_loss = mean_loss
            best_epoch = epoch
            best_state = copy.deepcopy(trained_model.state_dict())
            stale_epochs = 0
        else:
            stale_epochs += 1

        if stale_epochs >= patience:
            break

    trained_model.load_state_dict(best_state)
    trained_model.eval()
    return {
        "model": trained_model,
        "history": loss_history,
        "best_loss": best_loss,
        "best_epoch": best_epoch,
        "epochs_run": len(loss_history),
    }


adaptation_instances, adaptation_target_summary = attach_classical_targets(
    benchmark_bundle["adaptation_graphs"],
    p=QAOA_DEPTH,
    num_starts=8,
    maxiter=320,
    seed_offset=1000,
)
adapted_training = train_adapted_qaoa_gnn(adaptation_instances, p=QAOA_DEPTH, hidden_dim=64, epochs=500, lr=5e-3)
adapted_qaoa_gnn = adapted_training["model"]
adaptation_loss_history = adapted_training["history"]

representative_adapted = predict_instance_with_gnn(representative_graph, adapted_qaoa_gnn, QAOA_DEPTH)
representative_adapted["ratio_to_exact"] = representative_adapted["value"] / representative_graph["best_cut"]
representative_adapted["retention_vs_classical"] = representative_adapted["value"] / classical_reference["value"]
representative_adapted["speedup_vs_classical"] = classical_reference["time_seconds"] / max(representative_adapted["inference_time"], 1e-9)

adaptation_summary = pd.DataFrame(
    [
        {
            "classical_ratio": round(classical_reference["ratio_to_exact"], 4),
            "adapted_ratio": round(representative_adapted["ratio_to_exact"], 4),
            "retention_vs_classical": round(representative_adapted["retention_vs_classical"], 4),
            "classical_seconds": round(classical_reference["time_seconds"], 4),
            "adaptive_gnn_seconds": round(representative_adapted["inference_time"], 6),
            "speedup_vs_classical": round(representative_adapted["speedup_vs_classical"], 1),
        }
    ]
)

print("Transcriptomic adaptation complete.")
display(adaptation_summary)
if legacy_model_loaded:
    print(
        f"Legacy depth-1 ratio: {legacy_reference['ratio_to_exact']:.4f} | "
        f"Adapted depth-2 ratio: {representative_adapted['ratio_to_exact']:.4f}"
    )

fig, ax = plt.subplots(figsize=(7, 3.5))
ax.plot(adaptation_loss_history, color="#0f766e", linewidth=2)
ax.set_xlabel("Epoch")
ax.set_ylabel("MSE loss")
ax.set_title("Transcriptomic adaptation loss curve")
ax.grid(alpha=0.25)
plt.tight_layout()
save_notebook_figure(fig, "combined_transcriptomic_adaptation.png")
plt.show()

Transcriptomic adaptation complete.

Legacy depth-1 ratio: 0.6349 | Adapted depth-2 ratio: 0.8469
Saved figure assets -> /Users/mohuyn/Library/CloudStorage/OneDrive-SAS/Documents/GitHub/Hybrid-Quantum-Graph-AI-QAOA-GNN-Biomedical-Optimization/notebooks/figures/combined_transcriptomic_adaptation.png
                     -> /Users/mohuyn/Library/CloudStorage/OneDrive-SAS/Documents/GitHub/Hybrid-Quantum-Graph-AI-QAOA-GNN-Biomedical-Optimization/website/notebooks_html/figures/combined_transcriptomic_adaptation.png

benchmark_instances, benchmark_target_summary = attach_classical_targets(
    benchmark_bundle["benchmark_graphs"],
    p=QAOA_DEPTH,
    num_starts=8,
    maxiter=320,
    seed_offset=2000,
)

benchmark_rows = []
for instance in benchmark_instances:
    adapted_prediction = predict_instance_with_gnn(instance, adapted_qaoa_gnn, QAOA_DEPTH)
    classical_ratio = instance["classical_reference"]["value"] / instance["best_cut"]
    adapted_ratio = adapted_prediction["value"] / instance["best_cut"]
    benchmark_rows.append(
        {
            "graph_id": instance["graph_id"],
            "classical_ratio": classical_ratio,
            "adapted_ratio": adapted_ratio,
            "retention_vs_classical": adapted_prediction["value"] / instance["classical_reference"]["value"],
            "classical_seconds": instance["classical_reference"]["time_seconds"],
            "adapted_seconds": adapted_prediction["inference_time"],
            "speedup": instance["classical_reference"]["time_seconds"] / max(adapted_prediction["inference_time"], 1e-9),
            "predicted_gammas": adapted_prediction["gammas"],
            "predicted_betas": adapted_prediction["betas"],
        }
    )
benchmark_df = pd.DataFrame(benchmark_rows)
benchmark_summary = {
    "mean_classical_ratio": float(benchmark_df["classical_ratio"].mean()),
    "mean_adapted_ratio": float(benchmark_df["adapted_ratio"].mean()),
    "mean_retention": float(benchmark_df["retention_vs_classical"].mean()),
    "median_speedup": float(benchmark_df["speedup"].median()),
}

representative_graph_nx = representative_graph["graph"]
partition_colors = ["#0f766e" if bit == 0 else "#b91c1c" for bit in classical_reference["partition_bits"]]
positions = nx.spring_layout(representative_graph_nx, seed=SEED, weight="weight")
edge_widths = [2.0 + 5.0 * representative_graph_nx[u][v]["weight"] for u, v in representative_graph_nx.edges()]

interpolation_grid = np.linspace(0.0, 1.0, 61)
interpolation_scores = []
for alpha in interpolation_grid:
    gammas = (1.0 - alpha) * representative_adapted["gammas"] + alpha * classical_reference["gammas"]
    betas = (1.0 - alpha) * representative_adapted["betas"] + alpha * classical_reference["betas"]
    value, _ = qaoa_value_for_angles(representative_graph["cut_diagonal"], gammas, betas)
    interpolation_scores.append(value / representative_graph["best_cut"])

slice_grid = np.linspace(max(0.0, classical_reference["gammas"][0] - 0.7), min(np.pi, classical_reference["gammas"][0] + 0.7), 81)
slice_scores = []
for gamma_1 in slice_grid:
    gammas = classical_reference["gammas"].copy()
    gammas[0] = gamma_1
    value, _ = qaoa_value_for_angles(representative_graph["cut_diagonal"], gammas, classical_reference["betas"])
    slice_scores.append(value / representative_graph["best_cut"])

fig, axes = plt.subplots(2, 2, figsize=(14, 10))

nx.draw_networkx_nodes(representative_graph_nx, positions, node_color=partition_colors, node_size=950, ax=axes[0, 0])
nx.draw_networkx_labels(representative_graph_nx, positions, labels={node: representative_graph_nx.nodes[node]["gene"] for node in representative_graph_nx.nodes()}, font_size=8, font_weight="bold", ax=axes[0, 0])
nx.draw_networkx_edges(representative_graph_nx, positions, width=edge_widths, edge_color="#475569", alpha=0.75, ax=axes[0, 0])
axes[0, 0].set_title("Representative transcriptomic graph with exact MaxCut partition")
axes[0, 0].axis("off")

axes[0, 1].plot(interpolation_grid, interpolation_scores, color="#0f766e", linewidth=2.5)
axes[0, 1].scatter([0.0, 1.0], [interpolation_scores[0], interpolation_scores[-1]], color=["#b45309", "#1d4ed8"], s=70)
axes[0, 1].set_xlabel("Interpolation from adapted prediction to classical optimum")
axes[0, 1].set_ylabel("Cut ratio vs exact MaxCut")
axes[0, 1].set_title("Representative depth-2 interpolation path")
axes[0, 1].grid(alpha=0.25)

axes[1, 0].plot(slice_grid, slice_scores, color="#1d4ed8", linewidth=2.5)
axes[1, 0].axvline(classical_reference["gammas"][0], color="#0f766e", linestyle="--", linewidth=1.5, label="Classical optimum")
axes[1, 0].axvline(representative_adapted["gammas"][0], color="#b45309", linestyle=":", linewidth=1.8, label="Adapted prediction")
axes[1, 0].set_xlabel(r"$\gamma_1$")
axes[1, 0].set_ylabel("Cut ratio vs exact MaxCut")
axes[1, 0].set_title("Visible slice through the representative depth-2 landscape")
axes[1, 0].legend(frameon=False)
axes[1, 0].grid(alpha=0.25)

x_positions = np.arange(len(benchmark_df))
axes[1, 1].bar(x_positions - 0.18, benchmark_df["classical_ratio"], width=0.36, color="#cbd5e1", label="Classical depth-2")
axes[1, 1].bar(x_positions + 0.18, benchmark_df["adapted_ratio"], width=0.36, color="#0f766e", label="Adaptive Quantum GCN")
axes[1, 1].set_xticks(x_positions)
axes[1, 1].set_xticklabels([f"G{i}" for i in range(1, len(benchmark_df) + 1)])
axes[1, 1].set_ylim(0.0, 1.05)
axes[1, 1].set_ylabel("Approximation ratio")
axes[1, 1].set_title("Held-out transcriptomic benchmark")
axes[1, 1].legend(frameon=False)
axes[1, 1].grid(alpha=0.25, axis="y")

plt.tight_layout()
save_notebook_figure(fig, "combined_transcriptomic_benchmark.png")
plt.show()

benchmark_display = benchmark_df.drop(columns=["predicted_gammas", "predicted_betas"]).copy()
for column in ["classical_ratio", "adapted_ratio", "retention_vs_classical", "classical_seconds", "adapted_seconds", "speedup"]:
    benchmark_display[column] = benchmark_display[column].map(lambda value: round(float(value), 4))
display(benchmark_display)
print(
    "Held-out transcriptomic summary: "
    f"classical mean ratio = {benchmark_summary['mean_classical_ratio']:.4f}, "
    f"adapted mean ratio = {benchmark_summary['mean_adapted_ratio']:.4f}, "
    f"quality retention = {benchmark_summary['mean_retention']:.4f}, "
    f"median speedup = {benchmark_summary['median_speedup']:.1f}x"
)

Saved figure assets -> /Users/mohuyn/Library/CloudStorage/OneDrive-SAS/Documents/GitHub/Hybrid-Quantum-Graph-AI-QAOA-GNN-Biomedical-Optimization/notebooks/figures/combined_transcriptomic_benchmark.png
                     -> /Users/mohuyn/Library/CloudStorage/OneDrive-SAS/Documents/GitHub/Hybrid-Quantum-Graph-AI-QAOA-GNN-Biomedical-Optimization/website/notebooks_html/figures/combined_transcriptomic_benchmark.png

Held-out transcriptomic summary: classical mean ratio = 0.8628, adapted mean ratio = 0.8599, quality retention = 0.9967, median speedup = 536388.1x

# ═══════════════════════════════════════════════════════════════════════════════
# CELL 4 — CTG cohort loading, stratified split, graph construction
# ═══════════════════════════════════════════════════════════════════════════════

from sklearn.preprocessing import StandardScaler
from sklearn.neighbors import kneighbors_graph
from sklearn.model_selection import train_test_split
from ucimlrepo import fetch_ucirepo
import torch.nn as nn
import torch.nn.functional as F

ctg = fetch_ucirepo(id=193)
features_df = ctg.data.features.copy()
targets_df = ctg.data.targets.copy()

X_raw = features_df.astype(np.float32).to_numpy()
feature_names = np.array(features_df.columns)
nsp = targets_df["NSP"].astype(int).to_numpy()

y = (nsp == 3).astype(np.int64)
target_names = np.array(["non-pathologic", "pathologic"])
state_3class = pd.Series(nsp).map({1: "normal", 2: "suspect", 3: "pathologic"}).to_numpy()
risk_text = target_names[y]
case_ids = np.array([f"CTG_{i:04d}" for i in range(len(y))])

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

print("═" * 72)
print("  UCI Cardiotocography Cohort — Summary")
print("═" * 72)
print(f"  Total exams       : {len(y)}")
print(f"  Features          : {X_raw.shape[1]}  ({', '.join(feature_names[:4].tolist())}, ...)")
print(f"  NSP=1 Normal      : {(nsp == 1).sum():4d}  ({(nsp == 1).mean() * 100:.1f}%)")
print(f"  NSP=2 Suspect     : {(nsp == 2).sum():4d}  ({(nsp == 2).mean() * 100:.1f}%)")
print(f"  NSP=3 Pathologic  : {(nsp == 3).sum():4d}  ({(nsp == 3).mean() * 100:.1f}%)")
print(f"  Binary non-path   : {(y == 0).sum():4d}  ({(y == 0).mean() * 100:.1f}%)")
print(f"  Binary pathologic : {(y == 1).sum():4d}  ({(y == 1).mean() * 100:.1f}%)")

all_idx = np.arange(len(y))
train_pool_idx_np, test_idx_np = train_test_split(
    all_idx,
    test_size=0.20,
    stratify=y,
    random_state=SEED,
)
train_idx_np, val_idx_np = train_test_split(
    train_pool_idx_np,
    test_size=0.15,
    stratify=y[train_pool_idx_np],
    random_state=SEED,
)

print()
print("─" * 72)
print("  Stratified Split (binary pathologic vs non-pathologic)")
print("─" * 72)
for name, idx in [("Train", train_idx_np), ("Validation", val_idx_np), ("Test", test_idx_np)]:
    n0 = int((y[idx] == 0).sum())
    n1 = int((y[idx] == 1).sum())
    print(f"  {name:12s}: {len(idx):4d} exams  (non-path={n0}, path={n1}, path-prev={n1 / len(idx) * 100:.1f}%)")

scaler = StandardScaler()
scaler.fit(X_raw[train_idx_np])
X_std = scaler.transform(X_raw).astype(np.float32)

print()
print(f"  Train post-scale mean : {X_std[train_idx_np].mean():.6f}  (should be ≈ 0)")
print(f"  Train post-scale std  : {X_std[train_idx_np].std():.6f}   (should be ≈ 1)")

split_labels = np.full(len(y), "train", dtype=object)
split_labels[val_idx_np] = "validation"
split_labels[test_idx_np] = "test"

raw_df = features_df.copy()
raw_df.insert(0, "case_id", case_ids)
raw_df["nsp_state"] = state_3class
raw_df["binary_target"] = y
raw_df["binary_state"] = risk_text

proc_df = pd.DataFrame(X_std, columns=feature_names)
proc_df.insert(0, "case_id", case_ids)
proc_df["nsp_state"] = state_3class
proc_df["binary_target"] = y
proc_df["binary_state"] = risk_text
proc_df["split"] = split_labels

raw_path = os.path.join(out_dir, "ctg_raw.csv")
proc_path = os.path.join(out_dir, "ctg_processed.csv")
raw_df.to_csv(raw_path, index=False)
proc_df.to_csv(proc_path, index=False)
print(f"\n  ✓ Saved raw cohort       : {raw_path}")
print(f"  ✓ Saved processed cohort : {proc_path}")

train_path = X_std[train_idx_np][y[train_idx_np] == 1]
train_nonp = X_std[train_idx_np][y[train_idx_np] == 0]
mean_gap = train_path.mean(axis=0) - train_nonp.mean(axis=0)
top10_idx = np.argsort(np.abs(mean_gap))[-10:][::-1]

feature_gap_df = pd.DataFrame(
    {
        "feature": feature_names[top10_idx],
        "pathologic_minus_non_pathologic": mean_gap[top10_idx],
    }
)
print()
print("─" * 72)
print("  Top-10 Feature Shifts (Training Cohort: pathologic − non-pathologic)")
print("─" * 72)
for row in feature_gap_df.itertuples(index=False):
    direction = "↑ pathologic" if row.pathologic_minus_non_pathologic > 0 else "↑ non-pathologic"
    bar = "█" * int(abs(row.pathologic_minus_non_pathologic) * 5)
    print(f"  {row.feature:12s}  {row.pathologic_minus_non_pathologic:+.3f}  {bar}  ({direction})")

k_neighbors = 15
A_sparse = kneighbors_graph(
    X_std,
    n_neighbors=k_neighbors,
    mode="connectivity",
    include_self=False,
)
A_knn = A_sparse.maximum(A_sparse.T).toarray().astype(np.float32)
A_knn += np.eye(A_knn.shape[0], dtype=np.float32)

deg = A_knn.sum(axis=1)
d_inv_sqr = 1.0 / np.sqrt(np.clip(deg, 1.0, None))
A_norm = d_inv_sqr[:, None] * A_knn * d_inv_sqr[None, :]

Xt = torch.tensor(X_std, dtype=torch.float32, device=device)
At = torch.tensor(A_norm, dtype=torch.float32, device=device)
yt = torch.tensor(y, dtype=torch.long, device=device)
train_idx_t = torch.tensor(train_idx_np, dtype=torch.long, device=device)
val_idx_t = torch.tensor(val_idx_np, dtype=torch.long, device=device)
test_idx_t = torch.tensor(test_idx_np, dtype=torch.long, device=device)

n_samples = X_std.shape[0]
n_edges = int((A_knn.sum() - n_samples) / 2)
avg_deg = float((A_knn.sum(axis=1) - 1).mean())
density = n_edges / (n_samples * (n_samples - 1) / 2)

print()
print("─" * 72)
print(f"  Exam-Similarity Graph  (k={k_neighbors} NN, symmetric, self-loops added)")
print("─" * 72)
print(f"  Nodes (exams)      : {n_samples:,}")
print(f"  Edges (undirected) : {n_edges:,}")
print(f"  Average degree     : {avg_deg:.2f}")
print(f"  Graph density      : {density * 100:.4f}%  (sparse by design)")
print(f"  Device             : {device}")

════════════════════════════════════════════════════════════════════════
  UCI Cardiotocography Cohort — Summary
════════════════════════════════════════════════════════════════════════
  Total exams       : 2126
  Features          : 21  (LB, AC, FM, UC, ...)
  NSP=1 Normal      : 1655  (77.8%)
  NSP=2 Suspect     :  295  (13.9%)
  NSP=3 Pathologic  :  176  (8.3%)
  Binary non-path   : 1950  (91.7%)
  Binary pathologic :  176  (8.3%)

────────────────────────────────────────────────────────────────────────
  Stratified Split (binary pathologic vs non-pathologic)
────────────────────────────────────────────────────────────────────────
  Train       : 1445 exams  (non-path=1325, path=120, path-prev=8.3%)
  Validation  :  255 exams  (non-path=234, path=21, path-prev=8.2%)
  Test        :  426 exams  (non-path=391, path=35, path-prev=8.2%)

  Train post-scale mean : 0.000000  (should be ≈ 0)
  Train post-scale std  : 1.000000   (should be ≈ 1)

  ✓ Saved raw cohort       : /Users/mohuyn/Library/CloudStorage/OneDrive-SAS/Documents/GitHub/Hybrid-Quantum-Graph-AI-QAOA-GNN-Biomedical-Optimization/outputs/ctg_raw.csv
  ✓ Saved processed cohort : /Users/mohuyn/Library/CloudStorage/OneDrive-SAS/Documents/GitHub/Hybrid-Quantum-Graph-AI-QAOA-GNN-Biomedical-Optimization/outputs/ctg_processed.csv

────────────────────────────────────────────────────────────────────────
  Top-10 Feature Shifts (Training Cohort: pathologic − non-pathologic)
────────────────────────────────────────────────────────────────────────
  DP            +2.183  ██████████  (↑ pathologic)
  Mean          -1.611  ████████  (↑ non-pathologic)
  Mode          -1.582  ███████  (↑ non-pathologic)
  Median        -1.471  ███████  (↑ non-pathologic)
  Variance      +1.232  ██████  (↑ pathologic)
  ASTV          +1.100  █████  (↑ pathologic)
  MLTV          -0.945  ████  (↑ non-pathologic)
  AC            -0.768  ███  (↑ non-pathologic)
  Tendency      -0.740  ███  (↑ non-pathologic)
  DL            +0.698  ███  (↑ pathologic)

────────────────────────────────────────────────────────────────────────
  Exam-Similarity Graph  (k=15 NN, symmetric, self-loops added)
────────────────────────────────────────────────────────────────────────
  Nodes (exams)      : 2,126
  Edges (undirected) : 22,232
  Average degree     : 20.91
  Graph density      : 0.9842%  (sparse by design)
  Device             : cpu

# ═══════════════════════════════════════════════════════════════════════════════
# CELL 5 — ResidualClinicalGCN, weighted training, dual operating-point selection
# ═══════════════════════════════════════════════════════════════════════════════

from copy import deepcopy
from sklearn.metrics import accuracy_score, balanced_accuracy_score, roc_auc_score, average_precision_score, matthews_corrcoef, confusion_matrix

class ResidualClinicalGCN(nn.Module):
    def __init__(
        self,
        in_features: int,
        hidden_dim: int,
        num_classes: int,
        dropout: float = 0.15,
        residual_scale: float = 0.35,
    ):
        super().__init__()
        self.input_proj = nn.Linear(in_features, hidden_dim, bias=False)
        self.fc1 = nn.Linear(hidden_dim, hidden_dim, bias=False)
        self.fc2 = nn.Linear(hidden_dim, hidden_dim, bias=False)
        self.classifier = nn.Sequential(
            nn.Linear(hidden_dim * 3, hidden_dim),
            nn.ReLU(),
            nn.Dropout(dropout),
            nn.Linear(hidden_dim, num_classes),
        )
        self.dropout = nn.Dropout(dropout)
        self.residual_scale = residual_scale

    def forward(self, x: torch.Tensor, adj: torch.Tensor) -> torch.Tensor:
        h0 = F.relu(self.input_proj(x))
        h1 = F.relu(self.fc1(adj @ h0))
        h1 = self.dropout(h1)
        h2 = F.relu(self.fc2(adj @ h1))
        h2 = self.dropout(h2)
        h = h2 + self.residual_scale * h0
        h3 = adj @ h
        return self.classifier(torch.cat([h0, h, h3], dim=1))


def summarize_operating_point(y_true: np.ndarray, prob_pos: np.ndarray, threshold: float) -> dict:
    pred = (prob_pos >= threshold).astype(np.int64)
    tn, fp, fn, tp = confusion_matrix(y_true, pred, labels=[0, 1]).ravel()
    return {
        "threshold": float(threshold),
        "accuracy": accuracy_score(y_true, pred),
        "balanced_acc": balanced_accuracy_score(y_true, pred),
        "mcc": matthews_corrcoef(y_true, pred),
        "auc": roc_auc_score(y_true, prob_pos),
        "auprc": average_precision_score(y_true, prob_pos),
        "recall": tp / max(tp + fn, 1),
        "specificity": tn / max(tn + fp, 1),
        "precision": tp / max(tp + fp, 1),
        "npv": tn / max(tn + fn, 1),
        "tn": int(tn),
        "fp": int(fp),
        "fn": int(fn),
        "tp": int(tp),
        "pred": pred,
    }


model_hidden_dim = 64
model_dropout = 0.15
model_residual_scale = 0.35
recall_target = 0.90
presentation_mode = "balanced"

train_class_counts = np.bincount(y[train_idx_np], minlength=2).astype(np.float32)
class_weights = train_class_counts.sum() / (len(train_class_counts) * np.maximum(train_class_counts, 1))
class_weights[1] *= 1.15
class_weights_t = torch.as_tensor(class_weights, dtype=torch.float32, device=device)

bio_model = ResidualClinicalGCN(
    in_features=X_std.shape[1],
    hidden_dim=model_hidden_dim,
    num_classes=2,
    dropout=model_dropout,
    residual_scale=model_residual_scale,
).to(device)
optimizer = torch.optim.AdamW(bio_model.parameters(), lr=3e-3, weight_decay=5e-4)

n_bio_params = sum(p.numel() for p in bio_model.parameters() if p.requires_grad)

print("═" * 72)
print("  ResidualClinicalGCN — Architecture and Training Configuration")
print("═" * 72)
print(bio_model)
print(f"\nTrainable parameters : {n_bio_params:,}")
print(f"Hidden width         : {model_hidden_dim}")
print(f"Dropout              : {model_dropout:.2f}")
print(f"Residual scale       : {model_residual_scale:.2f}")
print(f"Class weight[0]      : {class_weights[0]:.4f}  (non-pathologic)")
print(f"Class weight[1]      : {class_weights[1]:.4f}  (pathologic)")
print(f"Loss weighting ratio : {class_weights[1] / class_weights[0]:.1f}x")
print(f"Validation recall target for recall-first mode : {recall_target:.2f}")
print(f"Presentation mode    : {presentation_mode}")

max_epochs = 180
patience = 30
best_state = None
best_epoch = 0
best_val_loss = float("inf")
best_val_metric = (-1.0, -1.0, -1.0)
epochs_no_improve = 0
stop_epoch = max_epochs

history = {
    "epoch": [],
    "train_loss": [],
    "val_loss": [],
    "train_acc": [],
    "val_acc": [],
    "val_bal_acc": [],
}

print()
print("Training progress")
print("-" * 72)
for epoch in range(1, max_epochs + 1):
    bio_model.train()
    optimizer.zero_grad()

    logits = bio_model(Xt, At)
    train_loss = F.cross_entropy(
        logits[train_idx_t],
        yt[train_idx_t],
        weight=class_weights_t,
        label_smoothing=0.02,
    )
    train_loss.backward()
    torch.nn.utils.clip_grad_norm_(bio_model.parameters(), max_norm=2.0)
    optimizer.step()

    bio_model.eval()
    with torch.no_grad():
        logits_eval = bio_model(Xt, At)
        val_logits = logits_eval[val_idx_t]
        val_loss = F.cross_entropy(val_logits, yt[val_idx_t], weight=class_weights_t).item()
        train_preds = np.argmax(logits_eval[train_idx_t].cpu().numpy(), axis=1)
        train_acc = accuracy_score(y[train_idx_np], train_preds)
        val_probs = F.softmax(val_logits, dim=1).cpu().numpy()[:, 1]

    epoch_best_row = None
    epoch_best_metric = (-1.0, -1.0, -1.0, -1.0, -1.0)
    for threshold in np.arange(0.20, 0.81, 0.01):
        metrics = summarize_operating_point(y[val_idx_np], val_probs, threshold)
        metric = (
            metrics["balanced_acc"],
            metrics["accuracy"],
            metrics["recall"],
            metrics["precision"],
            -abs(threshold - 0.5),
        )
        if metric > epoch_best_metric:
            epoch_best_metric = metric
            epoch_best_row = metrics

    history["epoch"].append(epoch)
    history["train_loss"].append(float(train_loss.item()))
    history["val_loss"].append(float(val_loss))
    history["train_acc"].append(float(train_acc))
    history["val_acc"].append(float(epoch_best_row["accuracy"]))
    history["val_bal_acc"].append(float(epoch_best_row["balanced_acc"]))

    checkpoint_metric = (
        epoch_best_row["balanced_acc"],
        epoch_best_row["accuracy"],
        -val_loss,
    )
    if checkpoint_metric > best_val_metric:
        best_val_metric = checkpoint_metric
        best_val_loss = float(val_loss)
        best_epoch = epoch
        best_state = deepcopy(bio_model.state_dict())
        epochs_no_improve = 0
    else:
        epochs_no_improve += 1

    if epoch == 1 or epoch % 10 == 0:
        print(
            f"Epoch {epoch:3d} | loss={train_loss.item():.4f} | "
            f"train_acc={train_acc:.3f} | val_acc={epoch_best_row['accuracy']:.3f} | "
            f"val_bal_acc={epoch_best_row['balanced_acc']:.3f} | val_loss={val_loss:.4f}"
        )

    if epochs_no_improve >= patience:
        print(
            f"Early stopping at epoch {epoch} "
            f"(best checkpoint from epoch {best_epoch}, bal_acc={best_val_metric[0]:.4f})."
        )
        stop_epoch = epoch
        break

bio_model.load_state_dict(best_state)
bio_model.eval()

with torch.no_grad():
    final_logits_t = bio_model(Xt, At)
    final_probs = F.softmax(final_logits_t, dim=1).cpu().numpy()

threshold_rows = []
for threshold in np.arange(0.05, 0.81, 0.01):
    threshold_rows.append(summarize_operating_point(y[val_idx_np], final_probs[val_idx_np, 1], threshold))

threshold_df = pd.DataFrame(threshold_rows)
default_row = threshold_df.sort_values(
    ["balanced_acc", "accuracy", "recall", "precision", "threshold"],
    ascending=[False, False, False, False, True],
).iloc[0]

eligible_recall = threshold_df[
    threshold_df["recall"] >= recall_target
].sort_values(["threshold", "precision"], ascending=[True, False])
if len(eligible_recall) == 0:
    eligible_recall = threshold_df.sort_values(
        ["recall", "balanced_acc", "threshold"],
        ascending=[False, False, True],
    )
recall_row = eligible_recall.iloc[0]

validation_operating_points = {
    "balanced": dict(default_row),
    "recall-first": dict(recall_row),
}

def evaluate_on_test(op_name: str, threshold: float) -> dict:
    metrics = summarize_operating_point(y[test_idx_np], final_probs[test_idx_np, 1], threshold)
    metrics["name"] = op_name
    return metrics

operating_points = {
    "balanced": evaluate_on_test("balanced", float(default_row["threshold"])),
    "recall-first": evaluate_on_test("recall-first", float(recall_row["threshold"])),
}
selected_operating_point = operating_points[presentation_mode]
decision_threshold = float(selected_operating_point["threshold"])

print()
print("Best-checkpoint summary")
print("-" * 72)
print(f"Best epoch                  : {best_epoch}")
print(f"Early stop epoch            : {stop_epoch}")
print(f"Best validation loss        : {best_val_loss:.6f}")
print()
print("Validation operating points")
print("-" * 72)
for name in ["balanced", "recall-first"]:
    row = validation_operating_points[name]
    print(
        f"{name:12s} | tau={row['threshold']:.2f} | acc={row['accuracy']:.4f} | "
        f"bal_acc={row['balanced_acc']:.4f} | recall={row['recall']:.4f} | "
        f"precision={row['precision']:.4f} | spec={row['specificity']:.4f}"
    )

print()
print("Held-out operating points")
print("-" * 72)
for name in ["balanced", "recall-first"]:
    row = operating_points[name]
    print(
        f"{name:12s} | tau={row['threshold']:.2f} | acc={row['accuracy']:.4f} | "
        f"bal_acc={row['balanced_acc']:.4f} | recall={row['recall']:.4f} | "
        f"spec={row['specificity']:.4f} | precision={row['precision']:.4f} | "
        f"MCC={row['mcc']:.4f} | fn={row['fn']} | fp={row['fp']}"
    )

print()
print(f"Active presentation mode     : {presentation_mode}")
print(f"Active threshold             : {decision_threshold:.2f}")
print(f"Active held-out accuracy     : {selected_operating_point['accuracy']:.4f} ({selected_operating_point['accuracy'] * 100:.1f}%)")
print(f"Active held-out recall       : {selected_operating_point['recall']:.4f} ({selected_operating_point['recall'] * 100:.1f}%)")
print(f"Active held-out ROC AUC      : {selected_operating_point['auc']:.4f}")

════════════════════════════════════════════════════════════════════════
  ResidualClinicalGCN — Architecture and Training Configuration
════════════════════════════════════════════════════════════════════════
ResidualClinicalGCN(
  (input_proj): Linear(in_features=21, out_features=64, bias=False)
  (fc1): Linear(in_features=64, out_features=64, bias=False)
  (fc2): Linear(in_features=64, out_features=64, bias=False)
  (classifier): Sequential(
    (0): Linear(in_features=192, out_features=64, bias=True)
    (1): ReLU()
    (2): Dropout(p=0.15, inplace=False)
    (3): Linear(in_features=64, out_features=2, bias=True)
  )
  (dropout): Dropout(p=0.15, inplace=False)
)

Trainable parameters : 22,018
Hidden width         : 64
Dropout              : 0.15
Residual scale       : 0.35
Class weight[0]      : 0.5453  (non-pathologic)
Class weight[1]      : 6.9240  (pathologic)
Loss weighting ratio : 12.7x
Validation recall target for recall-first mode : 0.90
Presentation mode    : balanced

Training progress
------------------------------------------------------------------------
Epoch   1 | loss=0.7368 | train_acc=0.084 | val_acc=0.796 | val_bal_acc=0.867 | val_loss=0.6406
Epoch  10 | loss=0.3617 | train_acc=0.881 | val_acc=0.906 | val_bal_acc=0.949 | val_loss=0.2426
Epoch  20 | loss=0.3018 | train_acc=0.914 | val_acc=0.949 | val_bal_acc=0.951 | val_loss=0.1557
Epoch  30 | loss=0.2752 | train_acc=0.947 | val_acc=0.965 | val_bal_acc=0.981 | val_loss=0.1597
Epoch  40 | loss=0.2581 | train_acc=0.961 | val_acc=0.961 | val_bal_acc=0.979 | val_loss=0.1497
Epoch  50 | loss=0.2438 | train_acc=0.964 | val_acc=0.965 | val_bal_acc=0.981 | val_loss=0.1569
Epoch  60 | loss=0.2419 | train_acc=0.956 | val_acc=0.965 | val_bal_acc=0.981 | val_loss=0.1302
Epoch  70 | loss=0.2346 | train_acc=0.974 | val_acc=0.973 | val_bal_acc=0.985 | val_loss=0.1391
Epoch  80 | loss=0.2288 | train_acc=0.963 | val_acc=0.969 | val_bal_acc=0.983 | val_loss=0.1239
Epoch  90 | loss=0.2273 | train_acc=0.977 | val_acc=0.988 | val_bal_acc=0.994 | val_loss=0.1199
Epoch 100 | loss=0.2278 | train_acc=0.981 | val_acc=0.976 | val_bal_acc=0.987 | val_loss=0.1357
Epoch 110 | loss=0.2247 | train_acc=0.979 | val_acc=0.992 | val_bal_acc=0.996 | val_loss=0.1221
Epoch 120 | loss=0.2150 | train_acc=0.985 | val_acc=0.984 | val_bal_acc=0.991 | val_loss=0.1236
Epoch 130 | loss=0.2152 | train_acc=0.979 | val_acc=0.988 | val_bal_acc=0.994 | val_loss=0.1122
Epoch 140 | loss=0.2103 | train_acc=0.981 | val_acc=0.980 | val_bal_acc=0.989 | val_loss=0.1154
Epoch 150 | loss=0.2068 | train_acc=0.983 | val_acc=0.976 | val_bal_acc=0.987 | val_loss=0.1183
Epoch 160 | loss=0.2084 | train_acc=0.986 | val_acc=0.984 | val_bal_acc=0.991 | val_loss=0.1085
Epoch 170 | loss=0.2076 | train_acc=0.990 | val_acc=0.992 | val_bal_acc=0.996 | val_loss=0.1144
Epoch 180 | loss=0.2049 | train_acc=0.995 | val_acc=0.992 | val_bal_acc=0.996 | val_loss=0.1445

Best-checkpoint summary
------------------------------------------------------------------------
Best epoch                  : 162
Early stop epoch            : 180
Best validation loss        : 0.102432

Validation operating points
------------------------------------------------------------------------
balanced     | tau=0.68 | acc=0.9922 | bal_acc=0.9957 | recall=1.0000 | precision=0.9130 | spec=0.9915
recall-first | tau=0.05 | acc=0.0824 | bal_acc=0.5000 | recall=1.0000 | precision=0.0824 | spec=0.0000

Held-out operating points
------------------------------------------------------------------------
balanced     | tau=0.68 | acc=0.9906 | bal_acc=0.9559 | recall=0.9143 | spec=0.9974 | precision=0.9697 | MCC=0.9366 | fn=3 | fp=1
recall-first | tau=0.05 | acc=0.0822 | bal_acc=0.5000 | recall=1.0000 | spec=0.0000 | precision=0.0822 | MCC=0.0000 | fn=0 | fp=391

Active presentation mode     : balanced
Active threshold             : 0.68
Active held-out accuracy     : 0.9906 (99.1%)
Active held-out recall       : 0.9143 (91.4%)
Active held-out ROC AUC      : 0.9879

# ═══════════════════════════════════════════════════════════════════════════════
# CELL 6 — Held-out evaluation dashboard for the tuned residual CTG model
# ═══════════════════════════════════════════════════════════════════════════════

from sklearn.metrics import (
    ConfusionMatrixDisplay,
    confusion_matrix,
    precision_recall_curve,
    roc_curve,
    auc,
)
from sklearn.decomposition import PCA

bio_model.eval()
with torch.no_grad():
    final_logits_t = bio_model(Xt, At)
    final_probs = F.softmax(final_logits_t, dim=1).cpu().numpy()

selected_eval = operating_points[presentation_mode]
default_eval = operating_points["balanced"]
recall_eval = operating_points["recall-first"]
final_preds = selected_eval["pred"]
decision_threshold = float(selected_eval["threshold"])

y_te_true = y[test_idx_np]
y_te_pred = final_preds
y_te_prob_path = final_probs[test_idx_np, 1]

cm = confusion_matrix(y_te_true, y_te_pred, labels=[0, 1])
tn, fp, fn, tp = cm.ravel()

overall_accuracy = selected_eval["accuracy"]
pathologic_recall = selected_eval["recall"]
non_pathologic_specificity = selected_eval["specificity"]
pathologic_precision = selected_eval["precision"]
negative_predictive_value = selected_eval["npv"]
balanced_accuracy = selected_eval["balanced_acc"]
mcc = selected_eval["mcc"]
roc_auc = selected_eval["auc"]
auprc = selected_eval["auprc"]

fpr, tpr, _ = roc_curve(y_te_true, y_te_prob_path)
prec_pr, rec_pr, _ = precision_recall_curve(y_te_true, y_te_prob_path)

test_discordances = int((y_te_pred != y_te_true).sum())
comparison_df = pd.DataFrame(
    [
        {
            "mode": "balanced",
            "threshold": default_eval["threshold"],
            "accuracy": default_eval["accuracy"],
            "balanced_acc": default_eval["balanced_acc"],
            "recall": default_eval["recall"],
            "specificity": default_eval["specificity"],
            "precision": default_eval["precision"],
            "mcc": default_eval["mcc"],
            "fn": default_eval["fn"],
            "fp": default_eval["fp"],
        },
        {
            "mode": "recall-first",
            "threshold": recall_eval["threshold"],
            "accuracy": recall_eval["accuracy"],
            "balanced_acc": recall_eval["balanced_acc"],
            "recall": recall_eval["recall"],
            "specificity": recall_eval["specificity"],
            "precision": recall_eval["precision"],
            "mcc": recall_eval["mcc"],
            "fn": recall_eval["fn"],
            "fp": recall_eval["fp"],
        },
    ]
)

print("═" * 72)
print("  ResidualClinicalGCN — Held-Out Test Evaluation (CTG dataset)")
print("═" * 72)
print(f"  Active mode                  : {presentation_mode}")
print(f"  Decision threshold           : {decision_threshold:.2f}  (validation-selected)")
print(f"  Accuracy                     : {overall_accuracy:.4f} ({overall_accuracy * 100:.1f}%)")
print(f"  Balanced accuracy            : {balanced_accuracy:.4f}")
print(f"  Pathologic recall            : {pathologic_recall:.4f} ({pathologic_recall * 100:.1f}%)")
print(f"  Non-pathologic specificity   : {non_pathologic_specificity:.4f} ({non_pathologic_specificity * 100:.1f}%)")
print(f"  Pathologic precision         : {pathologic_precision:.4f} ({pathologic_precision * 100:.1f}%)")
print(f"  Negative predictive value    : {negative_predictive_value:.4f} ({negative_predictive_value * 100:.1f}%)")
print(f"  ROC AUC                      : {roc_auc:.4f}")
print(f"  AUPRC                        : {auprc:.4f}")
print(f"  MCC                          : {mcc:.4f}")
print(f"  False negatives              : {fn}")
print(f"  False positives              : {fp}")
print(f"  Held-out discordances        : {test_discordances} / {len(y_te_true)}")
print()
print("Operating-point comparison")
print(comparison_df.to_string(index=False, formatters={
    "threshold": "{:.2f}".format,
    "accuracy": "{:.4f}".format,
    "balanced_acc": "{:.4f}".format,
    "recall": "{:.4f}".format,
    "specificity": "{:.4f}".format,
    "precision": "{:.4f}".format,
    "mcc": "{:.4f}".format,
}))

fig, axes = plt.subplots(2, 3, figsize=(20, 12))

cm_display = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=target_names)
cm_display.plot(ax=axes[0, 0], colorbar=False, cmap="Blues")
axes[0, 0].set_title(f"Confusion Matrix\n({presentation_mode}, tau = {decision_threshold:.2f})", fontsize=12, fontweight="bold")
axes[0, 0].set_xlabel(
    f"Recall={pathologic_recall:.3f} | Specificity={non_pathologic_specificity:.3f}\n"
    f"PPV={pathologic_precision:.3f} | NPV={negative_predictive_value:.3f}",
    fontsize=8.5,
)

axes[0, 1].plot(fpr, tpr, lw=2.5, color="firebrick", label=f"ResidualClinicalGCN  AUC = {roc_auc:.4f}")
axes[0, 1].plot([0, 1], [0, 1], "k--", lw=1, label="Random baseline")
axes[0, 1].fill_between(fpr, tpr, alpha=0.12, color="firebrick")
axes[0, 1].set_xlabel("False positive rate", fontsize=11)
axes[0, 1].set_ylabel("True positive rate", fontsize=11)
axes[0, 1].set_title("ROC Curve\n(pathologic treated as positive)", fontsize=12, fontweight="bold")
axes[0, 1].legend(fontsize=9)
axes[0, 1].set_aspect("equal")

axes[0, 2].plot(rec_pr, prec_pr, lw=2.5, color="darkorchid", label=f"ResidualClinicalGCN  AUPRC = {auprc:.4f}")
axes[0, 2].axhline(y_te_true.mean(), color="k", ls="--", lw=1, label=f"Prevalence baseline = {y_te_true.mean():.3f}")
axes[0, 2].fill_between(rec_pr, prec_pr, alpha=0.10, color="darkorchid")
axes[0, 2].set_xlabel("Recall", fontsize=11)
axes[0, 2].set_ylabel("Precision", fontsize=11)
axes[0, 2].set_title("Precision–Recall Curve\n(critical under imbalance)", fontsize=12, fontweight="bold")
axes[0, 2].legend(fontsize=9)
axes[0, 2].set_xlim([0, 1])
axes[0, 2].set_ylim([0, 1.05])

axes[1, 0].plot(history["epoch"], history["train_acc"], color="navy", label="Train accuracy")
axes[1, 0].plot(history["epoch"], history["val_acc"], color="darkorange", label="Validation accuracy")
axes[1, 0].plot(history["epoch"], history["val_bal_acc"], color="#0f766e", label="Validation balanced accuracy")
axes[1, 0].axvline(best_epoch, color="gray", linestyle=":", linewidth=1.2, label=f"Best epoch = {best_epoch}")
axes[1, 0].set_xlabel("Epoch")
axes[1, 0].set_ylabel("Accuracy")
axes[1, 0].set_ylim(0.0, 1.05)
axes[1, 0].set_title("Training dynamics", fontsize=12, fontweight="bold")
axes[1, 0].grid(True, alpha=0.3)
loss_axis = axes[1, 0].twinx()
loss_axis.plot(history["epoch"], history["val_loss"], color="forestgreen", linestyle="--", label="Validation loss")
loss_axis.set_ylabel("Validation loss")
train_lines, train_labels = axes[1, 0].get_legend_handles_labels()
loss_lines, loss_labels = loss_axis.get_legend_handles_labels()
axes[1, 0].legend(train_lines + loss_lines, train_labels + loss_labels, loc="best", fontsize=9)

pca = PCA(n_components=2, random_state=SEED)
Z = pca.fit_transform(X_std)
sc_true = axes[1, 1].scatter(Z[:, 0], Z[:, 1], c=y, cmap="coolwarm", s=10, alpha=0.65)
axes[1, 1].set_title("PCA Projection — True Labels", fontsize=12, fontweight="bold")
axes[1, 1].set_xlabel(f"PC1 ({pca.explained_variance_ratio_[0] * 100:.1f}% var.)")
axes[1, 1].set_ylabel(f"PC2 ({pca.explained_variance_ratio_[1] * 100:.1f}% var.)")
plt.colorbar(sc_true, ax=axes[1, 1], ticks=[0, 1], label="0=non-path | 1=pathologic")

discord_mask = np.zeros(len(y), dtype=bool)
discord_mask[test_idx_np] = y_te_pred != y[test_idx_np]
sc_pred = axes[1, 2].scatter(Z[:, 0], Z[:, 1], c=np.where(final_probs[:, 1] >= decision_threshold, 1, 0), cmap="coolwarm", s=10, alpha=0.55)
axes[1, 2].scatter(Z[test_idx_np, 0], Z[test_idx_np, 1], facecolors="none", edgecolors="black", s=35, linewidths=0.5, label="Test exams", zorder=4)
axes[1, 2].scatter(Z[discord_mask, 0], Z[discord_mask, 1], c="gold", s=55, marker="x", linewidths=1.8, label=f"Discordances ({discord_mask.sum()})", zorder=5)
axes[1, 2].set_title("PCA Projection — Predictions", fontsize=12, fontweight="bold")
axes[1, 2].set_xlabel(f"PC1 ({pca.explained_variance_ratio_[0] * 100:.1f}% var.)")
axes[1, 2].set_ylabel(f"PC2 ({pca.explained_variance_ratio_[1] * 100:.1f}% var.)")
axes[1, 2].legend(fontsize=9)
plt.colorbar(sc_pred, ax=axes[1, 2], ticks=[0, 1], label="Predicted label")

plt.suptitle("ResidualClinicalGCN — Evaluation Dashboard (UCI CTG held-out test set)", fontsize=15, fontweight="bold", y=1.01)
plt.tight_layout()
save_notebook_figure(fig, "combined_ctg_evaluation_dashboard.png")
plt.show()

print("\nSummary metrics (active held-out operating point)")
print(f"  Mode                        : {presentation_mode}")
print(f"  Accuracy                    : {overall_accuracy:.4f} ({overall_accuracy * 100:.1f}%)")
print(f"  Balanced accuracy           : {balanced_accuracy:.4f}")
print(f"  Pathologic recall           : {pathologic_recall:.4f} ({pathologic_recall * 100:.1f}%)")
print(f"  Non-pathologic specificity  : {non_pathologic_specificity:.4f} ({non_pathologic_specificity * 100:.1f}%)")
print(f"  Pathologic precision        : {pathologic_precision:.4f} ({pathologic_precision * 100:.1f}%)")
print(f"  Negative predictive value   : {negative_predictive_value:.4f} ({negative_predictive_value * 100:.1f}%)")
print(f"  ROC AUC                     : {roc_auc:.4f}")
print(f"  AUPRC                       : {auprc:.4f}")
print(f"  MCC                         : {mcc:.4f}")
print(f"  False negatives             : {fn}")
print(f"  False positives             : {fp}")

════════════════════════════════════════════════════════════════════════
  ResidualClinicalGCN — Held-Out Test Evaluation (CTG dataset)
════════════════════════════════════════════════════════════════════════
  Active mode                  : balanced
  Decision threshold           : 0.68  (validation-selected)
  Accuracy                     : 0.9906 (99.1%)
  Balanced accuracy            : 0.9559
  Pathologic recall            : 0.9143 (91.4%)
  Non-pathologic specificity   : 0.9974 (99.7%)
  Pathologic precision         : 0.9697 (97.0%)
  Negative predictive value    : 0.9924 (99.2%)
  ROC AUC                      : 0.9879
  AUPRC                        : 0.9576
  MCC                          : 0.9366
  False negatives              : 3
  False positives              : 1
  Held-out discordances        : 4 / 426

Operating-point comparison
        mode threshold accuracy balanced_acc recall specificity precision    mcc  fn  fp
    balanced      0.68   0.9906       0.9559 0.9143      0.9974    0.9697 0.9366   3   1
recall-first      0.05   0.0822       0.5000 1.0000      0.0000    0.0822 0.0000   0 391
Saved figure assets -> /Users/mohuyn/Library/CloudStorage/OneDrive-SAS/Documents/GitHub/Hybrid-Quantum-Graph-AI-QAOA-GNN-Biomedical-Optimization/notebooks/figures/combined_ctg_evaluation_dashboard.png
                     -> /Users/mohuyn/Library/CloudStorage/OneDrive-SAS/Documents/GitHub/Hybrid-Quantum-Graph-AI-QAOA-GNN-Biomedical-Optimization/website/notebooks_html/figures/combined_ctg_evaluation_dashboard.png

Summary metrics (active held-out operating point)
  Mode                        : balanced
  Accuracy                    : 0.9906 (99.1%)
  Balanced accuracy           : 0.9559
  Pathologic recall           : 0.9143 (91.4%)
  Non-pathologic specificity  : 0.9974 (99.7%)
  Pathologic precision        : 0.9697 (97.0%)
  Negative predictive value   : 0.9924 (99.2%)
  ROC AUC                     : 0.9879
  AUPRC                       : 0.9576
  MCC                         : 0.9366
  False negatives             : 3
  False positives             : 1

# ═══════════════════════════════════════════════════════════════════════════════
# CELL 7 — Fixed-split robustness study across multiple training seeds
# ═══════════════════════════════════════════════════════════════════════════════

from sklearn.metrics import (
    accuracy_score,
    balanced_accuracy_score,
    matthews_corrcoef,
    roc_auc_score,
    average_precision_score,
    recall_score,
    precision_score,
)

ROBUST_SPLIT_SEED = 42
ROBUST_MODEL_SEEDS = [7, 21, 42, 84, 126]
ROBUST_THRESHOLD = float(decision_threshold) if "decision_threshold" in globals() else 0.78


def compute_binary_metrics_local(y_true, y_pred, prob_pos):
    tn, fp, fn, tp = confusion_matrix(y_true, y_pred, labels=[0, 1]).ravel()
    specificity = tn / max(tn + fp, 1)
    npv = tn / max(tn + fn, 1)
    return {
        "accuracy": accuracy_score(y_true, y_pred),
        "balanced_acc": balanced_accuracy_score(y_true, y_pred),
        "recall": recall_score(y_true, y_pred, zero_division=0),
        "precision": precision_score(y_true, y_pred, zero_division=0),
        "specificity": specificity,
        "npv": npv,
        "mcc": matthews_corrcoef(y_true, y_pred),
        "auc": roc_auc_score(y_true, prob_pos),
        "auprc": average_precision_score(y_true, prob_pos),
        "tn": int(tn),
        "fp": int(fp),
        "fn": int(fn),
        "tp": int(tp),
    }


def build_normalized_knn_graph(features, k_neighbors=15):
    sparse_graph = kneighbors_graph(
        features,
        n_neighbors=k_neighbors,
        mode="connectivity",
        include_self=False,
        metric="euclidean",
    )
    adjacency = sparse_graph.toarray().astype(np.float32)
    adjacency = np.maximum(adjacency, adjacency.T)
    np.fill_diagonal(adjacency, 1.0)
    degree = adjacency.sum(axis=1)
    degree_inv_sqrt = 1.0 / np.sqrt(np.clip(degree, 1e-8, None))
    normalized = degree_inv_sqrt[:, None] * adjacency * degree_inv_sqrt[None, :]
    graph_stats = {
        "nodes": int(adjacency.shape[0]),
        "edges": int((np.triu(adjacency, k=1) > 0).sum()),
        "density": float((np.triu(adjacency, k=1) > 0).sum() / max(adjacency.shape[0] * (adjacency.shape[0] - 1) / 2, 1)),
    }
    return normalized.astype(np.float32), graph_stats


def train_integrated_clinical_gcn_once(model_seed):
    random.seed(model_seed)
    np.random.seed(model_seed)
    torch.manual_seed(model_seed)
    if torch.cuda.is_available():
        torch.cuda.manual_seed_all(model_seed)

    all_indices = np.arange(len(y))
    train_pool_idx, test_idx = train_test_split(
        all_indices,
        test_size=0.20,
        stratify=y,
        random_state=ROBUST_SPLIT_SEED,
    )
    train_idx, val_idx = train_test_split(
        train_pool_idx,
        test_size=0.15,
        stratify=y[train_pool_idx],
        random_state=ROBUST_SPLIT_SEED,
    )

    scaler = StandardScaler()
    scaler.fit(X_raw[train_idx])
    X_std_local = scaler.transform(X_raw).astype(np.float32)

    A_norm_local, graph_stats = build_normalized_knn_graph(X_std_local, k_neighbors=15)
    A_t = torch.tensor(A_norm_local, dtype=torch.float32, device=device)
    X_t = torch.tensor(X_std_local, dtype=torch.float32, device=device)
    y_t = torch.tensor(y, dtype=torch.long, device=device)

    train_mask = torch.zeros(len(y), dtype=torch.bool, device=device)
    val_mask = torch.zeros(len(y), dtype=torch.bool, device=device)
    train_mask[train_idx] = True
    val_mask[val_idx] = True

    train_counts = np.bincount(y[train_idx], minlength=2).astype(np.float32)
    class_weights_local = train_counts.sum() / (len(train_counts) * np.maximum(train_counts, 1))
    class_weights_local[1] *= 1.15
    criterion = nn.CrossEntropyLoss(weight=torch.tensor(class_weights_local, dtype=torch.float32, device=device))

    model = ResidualClinicalGCN(
        in_features=X_std_local.shape[1],
        hidden_dim=model_hidden_dim,
        num_classes=2,
        dropout=model_dropout,
        residual_scale=model_residual_scale,
    ).to(device)
    optimizer_local = torch.optim.AdamW(model.parameters(), lr=3e-3, weight_decay=5e-4)

    best_state_local = None
    best_val_metric_local = None
    wait = 0
    local_threshold_grid = np.linspace(0.20, 0.80, 61)
    for _epoch in range(1, 181):
        model.train()
        logits = model(X_t, A_t)
        loss = criterion(logits[train_mask], y_t[train_mask])
        optimizer_local.zero_grad()
        loss.backward()
        optimizer_local.step()

        model.eval()
        with torch.no_grad():
            val_logits = model(X_t, A_t)
            val_loss = criterion(val_logits[val_mask], y_t[val_mask]).item()
            val_probs_local = F.softmax(val_logits, dim=1)[:, 1].detach().cpu().numpy()

        best_epoch_metric_local = None
        for thr in local_threshold_grid:
            val_pred_local = (val_probs_local[val_idx] >= thr).astype(np.int64)
            val_metrics_local = compute_binary_metrics_local(y[val_idx], val_pred_local, val_probs_local[val_idx])
            candidate_metric_local = (
                float(val_metrics_local["balanced_acc"]),
                float(val_metrics_local["accuracy"]),
                float(val_metrics_local["recall"]),
                float(val_metrics_local["precision"]),
                -abs(float(thr) - 0.5),
            )
            if best_epoch_metric_local is None or candidate_metric_local > best_epoch_metric_local:
                best_epoch_metric_local = candidate_metric_local

        checkpoint_metric_local = best_epoch_metric_local + (-float(val_loss),)
        if best_val_metric_local is None or checkpoint_metric_local > best_val_metric_local:
            best_val_metric_local = checkpoint_metric_local
            best_state_local = {key: value.detach().cpu().clone() for key, value in model.state_dict().items()}
            wait = 0
        else:
            wait += 1
        if wait >= 30:
            break

    model.load_state_dict(best_state_local)
    model.eval()
    with torch.no_grad():
        final_probs_local = F.softmax(model(X_t, A_t), dim=1).cpu().numpy()

    prob_val_local = final_probs_local[val_idx, 1]
    threshold_rows_local = []
    for thr in local_threshold_grid:
        val_pred_local = (prob_val_local >= thr).astype(np.int64)
        val_metrics_local = compute_binary_metrics_local(y[val_idx], val_pred_local, prob_val_local)
        threshold_rows_local.append(
            {
                "threshold": float(thr),
                "accuracy": float(val_metrics_local["accuracy"]),
                "balanced_acc": float(val_metrics_local["balanced_acc"]),
                "recall": float(val_metrics_local["recall"]),
                "specificity": float(val_metrics_local["specificity"]),
                "precision": float(val_metrics_local["precision"]),
                "mcc": float(val_metrics_local["mcc"]),
            }
        )
    threshold_df_local = pd.DataFrame(threshold_rows_local)
    selected_row_local = threshold_df_local.sort_values(
        ["balanced_acc", "accuracy", "recall", "precision", "threshold"],
        ascending=[False, False, False, False, False],
    ).iloc[0]
    threshold_local = float(selected_row_local["threshold"])

    y_true = y[test_idx]
    y_pred = (final_probs_local[test_idx, 1] >= threshold_local).astype(np.int64)
    test_metrics_local = compute_binary_metrics_local(y_true, y_pred, final_probs_local[test_idx, 1])

    return {
        "seed": model_seed,
        "threshold": threshold_local,
        "accuracy": float(test_metrics_local["accuracy"]),
        "balanced_acc": float(test_metrics_local["balanced_acc"]),
        "mcc": float(test_metrics_local["mcc"]),
        "auc": float(test_metrics_local["auc"]),
        "auprc": float(test_metrics_local["auprc"]),
        "recall": float(test_metrics_local["recall"]),
        "precision": float(test_metrics_local["precision"]),
        "specificity": float(test_metrics_local["specificity"]),
        "false_negatives": int(test_metrics_local["fn"]),
        "false_positives": int(test_metrics_local["fp"]),
        "graph_density": graph_stats["density"],
    }


robustness_df = pd.DataFrame([train_integrated_clinical_gcn_once(seed) for seed in ROBUST_MODEL_SEEDS])

summary_rows = []
for metric in ["accuracy", "balanced_acc", "auc", "auprc", "recall", "precision", "specificity", "mcc"]:
    summary_rows.append(
        {
            "metric": metric,
            "mean": float(robustness_df[metric].mean()),
            "std": float(robustness_df[metric].std(ddof=0)),
        }
    )
robustness_summary = pd.DataFrame(summary_rows)

fig, axes = plt.subplots(1, 2, figsize=(12, 4.5))
axes[0].plot(robustness_df["seed"], robustness_df["accuracy"], marker="o", color="#0f766e", label="Accuracy")
axes[0].plot(robustness_df["seed"], robustness_df["balanced_acc"], marker="s", color="#1d4ed8", label="Balanced accuracy")
axes[0].plot(robustness_df["seed"], robustness_df["recall"], marker="^", color="#b91c1c", label="Recall")
axes[0].set_xlabel("Training seed")
axes[0].set_ylabel("Score")
axes[0].set_ylim(0.0, 1.02)
axes[0].set_title("Fixed-split robustness across training seeds")
axes[0].grid(alpha=0.25)
axes[0].legend(frameon=False)

x_positions = np.arange(len(robustness_summary))
axes[1].bar(x_positions, robustness_summary["mean"], color="#cbd5e1", width=0.65)
axes[1].errorbar(x_positions, robustness_summary["mean"], yerr=robustness_summary["std"], fmt="none", ecolor="#0f766e", elinewidth=2, capsize=4)
axes[1].set_xticks(x_positions)
axes[1].set_xticklabels([metric.replace("_", " ") for metric in robustness_summary["metric"]], rotation=35, ha="right")
axes[1].set_ylim(0.0, 1.02)
axes[1].set_title("Mean ± std across fixed-split runs")
axes[1].grid(alpha=0.25, axis="y")

plt.tight_layout()
save_notebook_figure(fig, "combined_ctg_robustness.png")
plt.show()

display(robustness_df.round(4))
display(robustness_summary.round(4))
print("Fixed-split robustness summary")
print(f"  Threshold used         : validation-selected per seed ({robustness_df['threshold'].mean():.2f} ± {robustness_df['threshold'].std(ddof=0):.2f})")
print(f"  Accuracy               : {robustness_df['accuracy'].mean():.4f} ± {robustness_df['accuracy'].std(ddof=0):.4f}")
print(f"  Balanced accuracy      : {robustness_df['balanced_acc'].mean():.4f} ± {robustness_df['balanced_acc'].std(ddof=0):.4f}")
print(f"  Recall                 : {robustness_df['recall'].mean():.4f} ± {robustness_df['recall'].std(ddof=0):.4f}")
print(f"  ROC AUC                : {robustness_df['auc'].mean():.4f} ± {robustness_df['auc'].std(ddof=0):.4f}")
print(f"  MCC                    : {robustness_df['mcc'].mean():.4f} ± {robustness_df['mcc'].std(ddof=0):.4f}")

Saved figure assets -> /Users/mohuyn/Library/CloudStorage/OneDrive-SAS/Documents/GitHub/Hybrid-Quantum-Graph-AI-QAOA-GNN-Biomedical-Optimization/notebooks/figures/combined_ctg_robustness.png
                     -> /Users/mohuyn/Library/CloudStorage/OneDrive-SAS/Documents/GitHub/Hybrid-Quantum-Graph-AI-QAOA-GNN-Biomedical-Optimization/website/notebooks_html/figures/combined_ctg_robustness.png

Fixed-split robustness summary
  Threshold used         : validation-selected per seed (0.51 ± 0.08)
  Accuracy               : 0.9568 ± 0.0074
  Balanced accuracy      : 0.9114 ± 0.0085
  Recall                 : 0.8571 ± 0.0256
  ROC AUC                : 0.9756 ± 0.0042
  MCC                    : 0.7497 ± 0.0273

exported_html = export_notebook_html("quantum_ai_bio_combined.html")
print(f"Exported HTML: {exported_html}")
print(f"Notebook figure directory: {notebook_figure_dir}")
print(f"Website figure directory : {html_figure_dir}")

Branch	Input graph	Learned parameterization	Downstream objective
Transcriptomic QAOA (primary)	gene co-expression graph	depth-2 angle vector $(\gamma_1, \gamma_2, \beta_1, \beta_2)$	expected maximum cut (MaxCut) value
Cardiotocography (CTG) screening	patient-similarity graph	node-level pathologic-risk scores	thresholded screening behavior

Metric	Value
Transcriptomic representative classical ratio	0.8976
Transcriptomic representative adapted ratio	0.8975
Transcriptomic held-out classical ratio	0.8686
Transcriptomic held-out adapted ratio	0.8682
Transcriptomic quality retention	99.95%
Transcriptomic median speedup	2,640x
CTG graph operating-point accuracy	98.8%
CTG graph operating-point balanced accuracy	0.942

Method	Mean Ratio	Δ vs. This Work	Notes
Zero angles (no optimization)	0.7224	−0.1458	Trivial baseline — no tuning
Prior-style transfer / random-init learned baseline	0.8208	−0.0474 (−5.77%)	Warm-start without graph conditioning
Direct classical search (Nelder–Mead, full budget)	0.8686	+0.0004	Full optimization, full latency
Random search (best of 256 evaluations)	0.8954	+0.0272	Expensive random sampling
Goemans–Williamson SDP classical guarantee	0.8780	+0.0098	Best polynomial-time classical bound
⭐ This work: graph-conditioned GNN, depth-2	0.8682	—	Single forward pass, 0.256 ms

Method	Accuracy	Balanced Acc.	ROC AUC	TP / 35	FP
Logistic Regression	94.1%	0.916	0.984	31	21
Random Forest	96.9%	0.905	0.994	29	7
MLP (tabular neural net)	98.4%	0.926	0.971	30	2
LightGBM	98.6%	0.927	0.993	30	1
XGBoost	98.8%	0.955	0.992	32	2
Calibrated LightGBM (strongest tabular)	99.1%	0.956	0.991	32	1
AdaptiveBioGCN (graph, this work)	96.7% ± 0.97%	—	—	—	—
⭐ ResidualClinicalGCN (graph, this work)	98.8%	0.942	0.978	31	1

Setting	Input graph	Parameterization	Downstream objective	Main metrics
Transcriptomic Quantum Approximate Optimization Algorithm (QAOA)	gene co-expression graph	depth-2 angle vector $(\gamma_1, \gamma_2, \beta_1, \beta_2)$	expected maximum cut (MaxCut) value	approximation ratio, runtime, regret
Cardiotocography (CTG) screening	patient-similarity graph	node-level pathologic-risk scores	thresholded screening behavior	accuracy, balanced accuracy, receiver operating characteristic area under the curve (ROC AUC), operating point

	graph_id	classical_ratio	adapted_ratio	retention_vs_classical	classical_seconds	adapted_seconds	speedup
0	42	0.8911	0.8888	0.9974	25.9195	0.0001	396474.4628
1	43	0.8531	0.8512	0.9977	24.2286	0.0001	435569.8022
2	44	0.8479	0.8471	0.9991	27.2057	0.0000	673141.8411
3	45	0.8454	0.8424	0.9965	21.2857	0.0000	500345.7195
4	46	0.8492	0.8414	0.9908	22.8257	0.0000	572430.4628
5	47	0.8899	0.8888	0.9987	24.4366	0.0000	610916.4020

Attribute	Value
Source	UCI ML Repository, id=193
Exams (nodes)	2,126
Features per exam	21 physiologic measurements
Original label (NSP)	1=Normal, 2=Suspect, 3=Pathologic
Binary target	pathologic (NSP=3) vs non-pathologic (NSP∈{1,2})
Pathologic prevalence	about 8-10%, depending on split

Signal	Why it matters
Best checkpoint epoch	shows the residual model was selected before overfitting
Balanced operating point	gives the strongest headline accuracy while preserving good minority-class behavior
Recall-first operating point	explicitly minimizes missed pathologic cases at the cost of some extra false positives
Validation-only threshold selection	prevents contamination of the final test estimate
Held-out dual-mode report	makes the deployment tradeoff explicit instead of hiding it

	seed	threshold	accuracy	balanced_acc	mcc	auc	auprc	recall	precision	specificity	false_negatives	false_positives	graph_density
0	7	0.42	0.9484	0.9198	0.7227	0.9705	0.8739	0.8857	0.6327	0.9540	4	18	0.0098
1	21	0.52	0.9601	0.9002	0.7536	0.9782	0.8862	0.8286	0.7250	0.9719	6	11	0.0098
2	42	0.66	0.9695	0.9053	0.8004	0.9790	0.9007	0.8286	0.8056	0.9821	6	7	0.0098
3	84	0.48	0.9531	0.9224	0.7405	0.9798	0.8766	0.8857	0.6596	0.9591	4	16	0.0098
4	126	0.48	0.9531	0.9094	0.7315	0.9706	0.8614	0.8571	0.6667	0.9616	5	15	0.0098

	metric	mean	std
0	accuracy	0.9568	0.0074
1	balanced_acc	0.9114	0.0085
2	auc	0.9756	0.0042
3	auprc	0.8797	0.0131
4	recall	0.8571	0.0256
5	precision	0.6979	0.0617
6	specificity	0.9657	0.0100
7	mcc	0.7497	0.0273

Graph Neural Networks for the Quantum Approximate Optimization Algorithm (QAOA)¶

Research Overview¶

Scope¶

System View¶

Key Results¶

Navigation¶

Statistical Comparison: This Work vs. All Prior Methods¶

Branch 1 — QAOA Parameter Prediction (held-out approximation ratio, higher = better)¶

Branch 2 — CTG Clinical Risk Scoring (held-out, n = 426 exams, 35 pathologic)¶

Joint Takeaway¶

Comparative View¶

Signal Gallery¶

Transcriptomic Adaptation and Benchmark¶

Integrated Biomedical Evaluation¶

Fixed-Split Robustness¶

0. Theoretical Foundation ¶

0.1 Quantum Approximate Optimization Algorithm (QAOA): Formalism and Guarantees¶

0.2 Graph Convolutional Network (GCN): Message Passing and Spectral Motivation¶

0.3 Hybrid Loop: Classical Orchestration + Quantum-Style Evaluation¶

0.4 Transductive Graph Classification (Biomedical Branch)¶

0.5 Execution Environment¶

Step 0 — Environment, Reproducibility, and Audit¶

Part 2 — Real Transcriptomic Optimization Setting ¶

1.1 Why the Optimization Branch Was Rebuilt¶

1.2 Why This Is a Stronger Optimization Setting¶

1.3 Optimization Protocol¶

1.4 Why the Small-Graph Regime Is Still Legitimate¶

2.1 Adaptation Objective¶

2.2 Why the Learned Model Matters¶

2.3 Adaptation and Representative Evaluation¶

2.4 What Counts as Success¶

Part 3 — Optimization Geometry and Held-Out Transcriptomic Evaluation ¶

3.1 Why the Optimization Branch Needs Both Figures and a Benchmark¶

3.2 What the Next Code Cell Produces¶

3.3 What to Look For¶

3.1 How to Read the Benchmark¶

3.2 Joint Benchmark Evidence¶

Part 4 — Biomedical Graph Learning: Transductive Clinical Risk Detection ¶

4.1 Dataset: UCI Cardiotocography Cohort¶

4.2 Protocol¶

4.3 Why the Model Was Upgraded¶

4.4 Why Transductive GCN Is Appropriate¶

4.5 What Counts as Success¶

Part 6 — Multi-Seed Robustness of the Integrated Clinical Graph Model ¶

Robustness protocol¶

Part 5 — Reading the Evaluation Dashboard ¶

Why six panels instead of one number¶

Top row — discrimination and operating behavior¶

Bottom row — training stability and cohort geometry¶

Takeaway¶

HTML Export¶