from pathlib import Path
import sys
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
from IPython.display import display

NOTEBOOK_DIR = Path.cwd().resolve()
ROOT = NOTEBOOK_DIR if (NOTEBOOK_DIR / 'data').exists() else NOTEBOOK_DIR.parent
DATA_DIR = ROOT / 'data'
OUTPUT_DIR = ROOT / 'outputs'

python_executable = Path(sys.executable)
if 'qaoa' not in str(python_executable).lower():
    raise RuntimeError(
        f'This study must be executed from the qaoa conda environment. Active interpreter: {python_executable}'
    )

plt.rcParams.update({
    'figure.figsize': (12, 6),
    'axes.grid': True,
    'grid.alpha': 0.25,
    'axes.spines.top': False,
    'axes.spines.right': False,
    'font.size': 11,
    'axes.titlesize': 13,
    'axes.labelsize': 11,
})

PALETTE = {
    'navy': '#0f172a',
    'blue': '#2563eb',
    'teal': '#0f766e',
    'gold': '#b45309',
    'red': '#b91c1c',
    'slate': '#475569',
}

qho_benchmark = pd.read_csv(OUTPUT_DIR / 'qho_full_benchmark.csv')
qho_summary = pd.read_csv(OUTPUT_DIR / 'qho_ground_state_interview_summary.csv').iloc[0]
qho_loss = pd.read_csv(OUTPUT_DIR / 'harmonic_oscillator_loss.csv')
qho_activation = pd.read_csv(OUTPUT_DIR / 'qho_activation_ablation.csv')
qho_depth = pd.read_csv(OUTPUT_DIR / 'qho_depth_ablation.csv')
qho_collocation = pd.read_csv(OUTPUT_DIR / 'qho_collocation_ablation.csv')
molecular_anchors = pd.read_csv(DATA_DIR / 'molecular_vibrational_anchors.csv')

def load_png(name: str):
    return mpimg.imread(OUTPUT_DIR / name)

display(pd.DataFrame({
    'artifact': [
        'Ground-state rel-L2',
        'Ground-state energy error',
        'Squared overlap',
        'Active interpreter',
    ],
    'value': [
        f"{qho_summary['rel_l2']:.6e}",
        f"{qho_summary['energy_abs_error']:.6e}",
        f"{qho_summary['overlap_sq']:.8f}",
        str(python_executable),
    ],
}))

feature_matrix = pd.DataFrame(
    {
        'Vanilla PINN': [0, 0, 0, 0, 0],
        'Proposed formulation': [1, 1, 1, 1, 1],
    },
    index=[
        'Hard Gaussian envelope',
        'Joint eigenvalue learning',
        'Parity regularization',
        'Rayleigh consistency',
        'Second-stage refinement',
    ],
)

scorecard = pd.DataFrame({
    'metric': ['Relative L2 error', 'Linf error', 'Energy abs. error', 'Rayleigh gap'],
    'value': [
        qho_summary['rel_l2'],
        qho_summary['linf'],
        qho_summary['energy_abs_error'],
        qho_summary['rayleigh_gap'],
    ],
})

fig, axes = plt.subplots(1, 2, figsize=(14, 5))
axes[0].imshow(feature_matrix.values, cmap='Blues', vmin=0, vmax=1)
axes[0].set_xticks(range(feature_matrix.shape[1]), feature_matrix.columns, rotation=15)
axes[0].set_yticks(range(feature_matrix.shape[0]), feature_matrix.index)
axes[0].set_title('Model Components Beyond a Vanilla PINN')
for row_index in range(feature_matrix.shape[0]):
    for col_index in range(feature_matrix.shape[1]):
        axes[0].text(
            col_index,
            row_index,
            'Yes' if feature_matrix.iloc[row_index, col_index] else 'No',
            ha='center',
            va='center',
            color='white' if feature_matrix.iloc[row_index, col_index] else PALETTE['navy'],
            fontsize=10,
            fontweight='bold',
        )

axes[1].barh(scorecard['metric'], scorecard['value'], color=[PALETTE['blue'], PALETTE['teal'], PALETTE['gold'], PALETTE['red']])
axes[1].set_xscale('log')
axes[1].set_title('Ground-State Error Metrics (Lower Is Better)')
axes[1].set_xlabel('Metric value')
axes[1].text(
    0.98,
    0.02,
    f"Squared overlap = {qho_summary['overlap_sq']:.8f}\nLearned energy = {qho_summary['energy_pred']:.8f}",
    transform=axes[1].transAxes,
    ha='right',
    va='bottom',
    bbox={'facecolor': 'white', 'edgecolor': '#cbd5e1'},
)
plt.tight_layout()

display(scorecard.round(8))

# ════════════════════════════════════════════════════════════════════════════
# ACCURACY-FIRST CONFIGURATION — edit this cell only to control the study configuration
# ════════════════════════════════════════════════════════════════════════════

RUN_PROFILE = 'interview'   # 'demo' | 'interview'

PROFILE_SCALE = {
    'demo': 0.18,
    'interview': 1.0,
}

_SCALE = PROFILE_SCALE[RUN_PROFILE]


def T(n_epochs: int) -> int:
    """Scale auxiliary experiment lengths while keeping the main result accuracy-first."""
    return max(100, int(n_epochs * _SCALE))


# Ground-state optimization budget.
GROUND_STATE_EPOCHS_ADAM = 1500 if RUN_PROFILE == 'demo' else 7000
GROUND_STATE_LBFGS_STEPS = 0 if RUN_PROFILE == 'demo' else 350

# Auxiliary experiment budgets.
EXCITED_STATE_EPOCHS = 900 if RUN_PROFILE == 'demo' else 2400
ABLATION_EPOCHS = 300 if RUN_PROFILE == 'demo' else 1200

print('=' * 72)
print(f'  Run profile              : {RUN_PROFILE}')
print(f'  Ground-state Adam steps  : {GROUND_STATE_EPOCHS_ADAM}')
print(f'  Ground-state L-BFGS      : {GROUND_STATE_LBFGS_STEPS}')
print(f'  Excited-state epochs     : {EXCITED_STATE_EPOCHS}')
print(f'  Ablation epochs (scaled) : {ABLATION_EPOCHS}')
print('=' * 72)
print()
print('Study philosophy: the main QHO result is accuracy-first, not demo-first.')
print('The later ablations remain scaled so the study stays runnable end-to-end.')

========================================================================
  Run profile              : interview
  Ground-state Adam steps  : 7000
  Ground-state L-BFGS      : 350
  Excited-state epochs     : 2400
  Ablation epochs (scaled) : 1200
========================================================================

Notebook philosophy: the main QHO result is now accuracy-first, not demo-first.
The later ablations remain scaled so the notebook is still runnable end-to-end.

# ── Accuracy-First Hyperparameters & Model Construction ───────────────────
#
# This cell upgrades the original tutorial baseline in four ways:
#   1. GELU activation becomes the default for smoother higher-order derivatives.
#   2. Sobol collocation replaces purely random sampling for lower-discrepancy coverage.
#   3. Even-parity regularization exploits the exact symmetry of the QHO ground state.
#   4. Adam warm start is followed by L-BFGS refinement for higher final accuracy.

import torch
import torch.nn as nn

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f'Running on: {device}')

X_MIN, X_MAX = -7.0, 7.0
N_COLLOCATION = 768 if RUN_PROFILE == 'demo' else 3072
N_BOUNDARY = 2
N_NORM_QUAD = 801 if RUN_PROFILE == 'demo' else 2401
N_SYM_SAMPLES = 161 if RUN_PROFILE == 'demo' else 401
N_EPOCHS = GROUND_STATE_EPOCHS_ADAM
LR_ADAM = 8e-4 if RUN_PROFILE == 'demo' else 5e-4
LBFGS_LR = 0.6
HIDDEN_DIM = 64 if RUN_PROFILE == 'demo' else 96
N_LAYERS = 4 if RUN_PROFILE == 'demo' else 5
ACTIVATION = 'gelu'

LAM_PDE = 1.0
LAM_BC = 5.0
LAM_NORM = 25.0
LAM_SYM = 10.0
LAM_RAYLEIGH = 2.0

print('\nHyperparameters:')
print(f'  Domain                  : [{X_MIN}, {X_MAX}]')
print(f'  Collocation             : {N_COLLOCATION} Sobol points / epoch')
print(f'  Adam epochs             : {N_EPOCHS}')
print(f'  L-BFGS refinement       : {GROUND_STATE_LBFGS_STEPS}')
print(f'  Architecture            : {N_LAYERS} hidden layers x {HIDDEN_DIM} units ({ACTIVATION})')
print(f'  Loss weights            : λ_pde={LAM_PDE}  λ_bc={LAM_BC}  λ_norm={LAM_NORM}  λ_sym={LAM_SYM}  λ_ray={LAM_RAYLEIGH}')


class GaussianEnvelopePINN(nn.Module):
    """PINN with hard Gaussian envelope and configurable smooth activation."""

    def __init__(self, hidden_dim=64, n_layers=4, activation='gelu', x_scale=6.0):
        super().__init__()
        activations = {
            'tanh': nn.Tanh,
            'gelu': nn.GELU,
            'silu': nn.SiLU,
        }
        act_cls = activations.get(activation, nn.GELU)
        layers = []
        dims = [1] + [hidden_dim] * n_layers + [1]
        for index in range(len(dims) - 1):
            layers.append(nn.Linear(dims[index], dims[index + 1]))
            if index < len(dims) - 2:
                layers.append(act_cls())
        self.net = nn.Sequential(*layers)
        self.x_scale = float(x_scale)
        for module in self.net.modules():
            if isinstance(module, nn.Linear):
                nn.init.xavier_normal_(module.weight)
                nn.init.zeros_(module.bias)

    def forward(self, x):
        x_scaled = x / self.x_scale
        envelope = torch.exp(-x**2 / 4.0)
        return self.net(x_scaled) * envelope

    def count_parameters(self):
        return sum(param.numel() for param in self.parameters() if param.requires_grad)


def hamiltonian_terms(model, x):
    """Return ψ, ψ', ψ'', and Ĥψ for the harmonic oscillator Hamiltonian."""
    psi = model(x)
    dpsi_dx = torch.autograd.grad(
        psi,
        x,
        grad_outputs=torch.ones_like(psi),
        create_graph=True,
    )[0]
    dpsi_dxx = torch.autograd.grad(
        dpsi_dx,
        x,
        grad_outputs=torch.ones_like(dpsi_dx),
        create_graph=True,
    )[0]
    potential = 0.5 * x**2
    h_psi = -0.5 * dpsi_dxx + potential * psi
    return psi, dpsi_dx, dpsi_dxx, h_psi


sobol_engine = torch.quasirandom.SobolEngine(dimension=1, scramble=True, seed=42)


def sample_collocation_points(n_points: int) -> torch.Tensor:
    """Low-discrepancy sampling with a center-focused refinement band."""
    n_center = n_points // 4
    n_global = n_points - n_center
    global_points = X_MIN + (X_MAX - X_MIN) * sobol_engine.draw(n_global)
    center_points = 2.5 * (2.0 * sobol_engine.draw(n_center) - 1.0)
    x_col = torch.cat([global_points, center_points], dim=0).to(device)
    return x_col.requires_grad_(True)


x_quad = torch.linspace(X_MIN, X_MAX, N_NORM_QUAD, device=device).unsqueeze(1)
x_sym = torch.linspace(0.0, X_MAX, N_SYM_SAMPLES, device=device).unsqueeze(1)
bc_x = torch.tensor([[X_MIN], [X_MAX]], dtype=torch.float32, device=device).requires_grad_(True)
x_zero = torch.zeros((1, 1), dtype=torch.float32, device=device).requires_grad_(True)


def compute_losses(model, energy, x_col):
    psi_col, _, _, h_col = hamiltonian_terms(model, x_col)
    pde_residual = h_col - energy * psi_col
    loss_pde = (pde_residual**2).mean()

    psi_bc = model(bc_x)
    loss_bc = (psi_bc**2).mean()

    psi_quad, _, _, h_quad = hamiltonian_terms(model, x_quad.requires_grad_(True))
    norm_val = torch.trapezoid((psi_quad[:, 0] ** 2), x_quad[:, 0])
    loss_norm = (norm_val - 1.0) ** 2

    rayleigh_num = torch.trapezoid((psi_quad[:, 0] * h_quad[:, 0]), x_quad[:, 0])
    rayleigh_den = torch.trapezoid((psi_quad[:, 0] ** 2), x_quad[:, 0]).clamp_min(1e-8)
    rayleigh_energy = rayleigh_num / rayleigh_den
    loss_rayleigh = (energy.squeeze() - rayleigh_energy.detach()) ** 2

    psi_pos = model(x_sym)
    psi_neg = model(-x_sym)
    dpsi0 = torch.autograd.grad(model(x_zero), x_zero, grad_outputs=torch.ones((1, 1), device=device), create_graph=True)[0]
    loss_sym = ((psi_pos - psi_neg) ** 2).mean() + (dpsi0 ** 2).mean()

    total_loss = (
        LAM_PDE * loss_pde
        + LAM_BC * loss_bc
        + LAM_NORM * loss_norm
        + LAM_SYM * loss_sym
        + LAM_RAYLEIGH * loss_rayleigh
    )

    metrics = {
        'pde': loss_pde,
        'bc': loss_bc,
        'norm': loss_norm,
        'sym': loss_sym,
        'rayleigh': loss_rayleigh,
        'rayleigh_energy': rayleigh_energy.detach(),
        'norm_val': norm_val.detach(),
        'residual_rms': torch.sqrt((pde_residual**2).mean()).detach(),
    }
    return total_loss, metrics


model = GaussianEnvelopePINN(
    hidden_dim=HIDDEN_DIM,
    n_layers=N_LAYERS,
    activation=ACTIVATION,
    x_scale=max(abs(X_MIN), abs(X_MAX)),
).to(device)
energy = nn.Parameter(torch.tensor([0.80], dtype=torch.float32, device=device))

optimizer = torch.optim.AdamW(list(model.parameters()) + [energy], lr=LR_ADAM, weight_decay=1e-6)
scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=N_EPOCHS)

print(f'\nModel parameters        : {model.count_parameters():,}')
print(f'Initial energy guess    : {energy.item():.6f}')
print('Analytic target energy  : 0.500000')
print('Training mode           : accuracy-first (AdamW + symmetry + Rayleigh + L-BFGS)')

Running on: cpu

Hyperparameters:
  Domain                  : [-7.0, 7.0]
  Collocation             : 3072 Sobol points / epoch
  Adam epochs             : 7000
  L-BFGS refinement       : 350
  Architecture            : 5 hidden layers x 96 units (gelu)
  Loss weights            : λ_pde=1.0  λ_bc=5.0  λ_norm=25.0  λ_sym=10.0  λ_ray=2.0

Model parameters        : 37,537
Initial energy guess    : 0.800000
Analytic target energy  : 0.500000
Training mode           : accuracy-first (AdamW + symmetry + Rayleigh + L-BFGS)

# ── Ground-State Training Loop ─────────────────────────────────────────────
# Objective:
#   min_{θ,E}  λ_pde L_pde + λ_bc L_bc + λ_norm L_norm + λ_sym L_sym + λ_ray L_ray
#
# The accuracy-first additions are:
#   • Sobol collocation for lower-discrepancy PDE coverage
#   • parity regularization for the even ground state
#   • Rayleigh consistency regularization for energy stabilization
#   • optional L-BFGS refinement after Adam converges

history = {
    'epoch': [],
    'total': [],
    'pde': [],
    'bc': [],
    'norm': [],
    'sym': [],
    'rayleigh': [],
    'energy': [],
    'rayleigh_energy': [],
    'residual_rms': [],
}

log_every = 250 if RUN_PROFILE == 'demo' else 500

for epoch in range(1, N_EPOCHS + 1):
    optimizer.zero_grad()
    x_col = sample_collocation_points(N_COLLOCATION)
    loss, metrics = compute_losses(model, energy, x_col)
    loss.backward()
    torch.nn.utils.clip_grad_norm_(list(model.parameters()) + [energy], max_norm=1.0)
    optimizer.step()
    scheduler.step()

    if epoch % log_every == 0 or epoch == 1 or epoch == N_EPOCHS:
        history['epoch'].append(epoch)
        history['total'].append(loss.item())
        history['pde'].append(metrics['pde'].item())
        history['bc'].append(metrics['bc'].item())
        history['norm'].append(metrics['norm'].item())
        history['sym'].append(metrics['sym'].item())
        history['rayleigh'].append(metrics['rayleigh'].item())
        history['energy'].append(energy.item())
        history['rayleigh_energy'].append(metrics['rayleigh_energy'].item())
        history['residual_rms'].append(metrics['residual_rms'].item())
        print(
            f'Epoch {epoch:5d} | '
            f'L={loss.item():.3e} | '
            f'L_pde={metrics["pde"].item():.3e} | '
            f'L_norm={metrics["norm"].item():.3e} | '
            f'L_sym={metrics["sym"].item():.3e} | '
            f'E={energy.item():.6f} | '
            f'E_R={metrics["rayleigh_energy"].item():.6f}'
        )

if GROUND_STATE_LBFGS_STEPS > 0:
    print('\nStarting L-BFGS refinement on a fixed high-resolution collocation grid...')
    x_lbfgs = sample_collocation_points(max(2 * N_COLLOCATION, 4096))
    lbfgs = torch.optim.LBFGS(
        list(model.parameters()) + [energy],
        lr=LBFGS_LR,
        max_iter=GROUND_STATE_LBFGS_STEPS,
        history_size=50,
        tolerance_grad=1e-10,
        tolerance_change=1e-12,
        line_search_fn='strong_wolfe',
    )
    lbfgs_state = {}

    def closure():
        lbfgs.zero_grad()
        loss_lbfgs, metrics_lbfgs = compute_losses(model, energy, x_lbfgs)
        loss_lbfgs.backward()
        lbfgs_state['loss'] = loss_lbfgs.item()
        for key, value in metrics_lbfgs.items():
            lbfgs_state[key] = value.item() if torch.is_tensor(value) else value
        return loss_lbfgs

    lbfgs.step(closure)
    history['epoch'].append(N_EPOCHS + GROUND_STATE_LBFGS_STEPS)
    history['total'].append(lbfgs_state['loss'])
    history['pde'].append(lbfgs_state['pde'])
    history['bc'].append(lbfgs_state['bc'])
    history['norm'].append(lbfgs_state['norm'])
    history['sym'].append(lbfgs_state['sym'])
    history['rayleigh'].append(lbfgs_state['rayleigh'])
    history['energy'].append(energy.item())
    history['rayleigh_energy'].append(lbfgs_state['rayleigh_energy'])
    history['residual_rms'].append(lbfgs_state['residual_rms'])
    print(
        f'L-BFGS final | L={lbfgs_state["loss"]:.3e} | '
        f'L_pde={lbfgs_state["pde"]:.3e} | '
        f'L_norm={lbfgs_state["norm"]:.3e} | '
        f'L_sym={lbfgs_state["sym"]:.3e} | '
        f'E={energy.item():.6f} | '
        f'E_R={lbfgs_state["rayleigh_energy"]:.6f}'
    )

print(f'\nFinal predicted E0 = {energy.item():.8f}   |ΔE| = {abs(energy.item() - 0.5):.3e}')

fig, axes = plt.subplots(1, 3, figsize=(16, 4.4), facecolor='#0d1117')
for ax in axes:
    ax.set_facecolor('#161b22')
    ax.tick_params(colors='#8b949e')
    for spine in ax.spines.values():
        spine.set_edgecolor('#30363d')

ep = history['epoch']
axes[0].semilogy(ep, history['pde'], label='$\\mathcal{L}_{\\mathrm{pde}}$', color='#58a6ff', lw=2)
axes[0].semilogy(ep, history['norm'], label='$\\mathcal{L}_{\\mathrm{norm}}$', color='#d2a8ff', lw=2)
axes[0].semilogy(ep, history['sym'], label='$\\mathcal{L}_{\\mathrm{sym}}$', color='#ffa657', lw=2)
axes[0].semilogy(ep, history['total'], label='$\\mathcal{L}_{\\mathrm{total}}$', color='#e6edf3', lw=2, ls='--')
axes[0].set_xlabel('Epoch', color='#8b949e')
axes[0].set_ylabel('Loss (log scale)', color='#8b949e')
axes[0].set_title('Accuracy-First Loss Decomposition', color='#e6edf3')
axes[0].legend(facecolor='#21262d', edgecolor='#30363d', labelcolor='#e6edf3')

axes[1].plot(ep, history['energy'], color='#3fb950', lw=2.5, label='Learned $E_0$')
axes[1].plot(ep, history['rayleigh_energy'], color='#ffa657', lw=2.0, ls='--', label='Rayleigh $E_R$')
axes[1].axhline(0.5, color='#ff7b72', lw=1.5, ls=':', label='Exact $E_0=0.5$')
axes[1].set_xlabel('Epoch', color='#8b949e')
axes[1].set_ylabel('Energy', color='#8b949e')
axes[1].set_title('Eigenvalue and Rayleigh Consistency', color='#e6edf3')
axes[1].legend(facecolor='#21262d', edgecolor='#30363d', labelcolor='#e6edf3')

axes[2].semilogy(ep, history['residual_rms'], color='#79c0ff', lw=2.5)
axes[2].set_xlabel('Epoch', color='#8b949e')
axes[2].set_ylabel('RMS residual', color='#8b949e')
axes[2].set_title('Schrödinger Residual RMS', color='#e6edf3')

plt.tight_layout()
plt.savefig('../outputs/qho_training_convergence.png', dpi=160, bbox_inches='tight', facecolor='#0d1117')
plt.show()

Epoch     1 | L=2.743e+01 | L_pde=2.962e-08 | L_norm=1.000e+00 | L_sym=1.216e-07 | E=0.800500 | E_R=1.901898
Epoch   500 | L=2.811e-02 | L_pde=2.397e-02 | L_norm=1.226e-04 | L_sym=4.382e-09 | E=0.646114 | E_R=0.623065
Epoch  1000 | L=2.849e-02 | L_pde=2.238e-02 | L_norm=2.439e-04 | L_sym=2.330e-08 | E=0.614944 | E_R=0.612376
Epoch  1500 | L=2.449e-02 | L_pde=1.769e-02 | L_norm=2.684e-04 | L_sym=6.410e-08 | E=0.598186 | E_R=0.591605
Epoch  2000 | L=1.050e-02 | L_pde=8.818e-03 | L_norm=4.953e-05 | L_sym=6.539e-08 | E=0.558816 | E_R=0.544267
Epoch  2500 | L=6.840e-03 | L_pde=4.147e-03 | L_norm=1.077e-04 | L_sym=1.375e-07 | E=0.516200 | E_R=0.515870
Epoch  3000 | L=4.628e-03 | L_pde=3.783e-03 | L_norm=3.374e-05 | L_sym=1.487e-07 | E=0.514466 | E_R=0.514332
Epoch  3500 | L=3.134e-03 | L_pde=3.038e-03 | L_norm=3.661e-06 | L_sym=4.414e-07 | E=0.511602 | E_R=0.511393
Epoch  4000 | L=2.563e-04 | L_pde=2.360e-04 | L_norm=2.274e-10 | L_sym=2.024e-06 | E=0.500983 | E_R=0.500808
Epoch  4500 | L=5.650e-05 | L_pde=5.617e-05 | L_norm=4.864e-12 | L_sym=3.330e-08 | E=0.500137 | E_R=0.500132
Epoch  5000 | L=2.414e-05 | L_pde=2.401e-05 | L_norm=6.004e-13 | L_sym=1.346e-08 | E=0.500056 | E_R=0.500055
Epoch  5500 | L=1.170e-05 | L_pde=1.166e-05 | L_norm=8.882e-14 | L_sym=4.409e-09 | E=0.500026 | E_R=0.500026
Epoch  6000 | L=7.296e-06 | L_pde=7.277e-06 | L_norm=1.421e-14 | L_sym=1.867e-09 | E=0.500016 | E_R=0.500015
Epoch  6500 | L=5.912e-06 | L_pde=5.899e-06 | L_norm=1.279e-13 | L_sym=1.331e-09 | E=0.500015 | E_R=0.500012
Epoch  7000 | L=5.704e-06 | L_pde=5.692e-06 | L_norm=1.421e-14 | L_sym=1.273e-09 | E=0.500015 | E_R=0.500012

Starting L-BFGS refinement on a fixed high-resolution collocation grid...
L-BFGS final | L=5.705e-06 | L_pde=5.692e-06 | L_norm=6.963e-13 | L_sym=1.273e-09 | E=0.500015 | E_R=0.500012

Final predicted E0 = 0.50001526   |ΔE| = 1.526e-05

## 2. Real-World Data Anchors

> The study is intentionally CPU-feasible, but the underlying physics is not abstract. The harmonic approximation is the local model behind measurable vibrational modes in molecular spectroscopy, infrared sensing, and related quantum characterization workflows.

### Why the Dataset Matters

. The file `data/molecular_vibrational_anchors.csv` contains real system labels, representative vibrational wavenumbers, and zero-point energies.

. These anchors are not training supervision. They are interpretive anchors that explain why near-analytic eigenstate recovery matters in real measurement settings where the local potential is approximately quadratic and the quality of the state estimate governs downstream interpretation.

fig, axes = plt.subplots(2, 2, figsize=(14, 10))

axes[0, 0].plot(qho_loss['epoch'], qho_loss['total_loss'], color=PALETTE['blue'], label='total')
axes[0, 0].plot(qho_loss['epoch'], qho_loss['pde_loss'], color=PALETTE['teal'], label='pde')
axes[0, 0].plot(qho_loss['epoch'], qho_loss['bc_loss'], color=PALETTE['gold'], label='bc')
axes[0, 0].set_yscale('log')
axes[0, 0].set_title('Optimization History')
axes[0, 0].set_xlabel('Epoch')
axes[0, 0].legend()

axes[0, 1].bar(qho_activation['activation'], qho_activation['rel_l2_error'], color=PALETTE['teal'])
axes[0, 1].set_title('Activation Ablation')
axes[0, 1].set_ylabel('Relative L2 error')

axes[1, 0].plot(qho_depth['n_layers'], qho_depth['rel_l2_error'], marker='o', color=PALETTE['gold'])
axes[1, 0].set_title('Depth Sensitivity')
axes[1, 0].set_xlabel('Number of hidden layers')
axes[1, 0].set_ylabel('Relative L2 error')

axes[1, 1].plot(qho_collocation['n_collocation'], qho_collocation['rel_l2_error'], marker='o', color=PALETTE['red'])
axes[1, 1].set_title('Collocation Sensitivity')
axes[1, 1].set_xlabel('Collocation points')
axes[1, 1].set_ylabel('Relative L2 error')

plt.tight_layout()

display(qho_benchmark.round(6))

image_specs = [
    ('Ground-state analysis', 'qho_ground_state_analysis.png'),
    ('Training convergence', 'qho_training_convergence.png'),
    ('Excited-state recovery', 'qho_excited_states.png'),
    ('Uncertainty consistency', 'qho_uncertainty_principle.png'),
]

fig, axes = plt.subplots(2, 2, figsize=(14, 10))
for axis, (title, image_name) in zip(axes.ravel(), image_specs):
    axis.imshow(load_png(image_name))
    axis.set_title(title)
    axis.axis('off')
plt.tight_layout()

best_rows = qho_benchmark[['n', 'E_pinn', 'E_exact', 'delta_E', 'rel_l2', 'sigma_x_sigma_p']].copy()
display(best_rows.round(6))

supplementary_specs = [
    ('Analytic eigenstates', 'qho_analytic_eigenstates.png'),
    ('State overlap matrix', 'qho_overlap_matrix.png'),
    ('Ablation summary', 'qho_ablation.png'),
    ('Activation comparison', 'qho_activation_comparison.png'),
]

fig, axes = plt.subplots(2, 2, figsize=(14, 10))
for axis, (title, image_name) in zip(axes.ravel(), supplementary_specs):
    axis.imshow(load_png(image_name))
    axis.set_title(title)
    axis.axis('off')
plt.tight_layout()

summary_table = pd.DataFrame({
    'headline_metric': ['rel-L2', 'Linf', 'energy error', 'overlap^2'],
    'value': [
        qho_summary['rel_l2'],
        qho_summary['linf'],
        qho_summary['energy_abs_error'],
        qho_summary['overlap_sq'],
    ],
})
display(summary_table.round(8))

Component	Vanilla PINN	This work
Network head	Unconstrained scalar	Hard Gaussian envelope
Eigenvalue	Fixed or ignored	Jointly learned
Symmetry	Emergent (unreliable)	Parity-aware regularization
Energy–state consistency	Absent	Rayleigh-quotient term

	system	domain	wavenumber_cm^-1	zero_point_energy_eV	source_note
0	H2 stretch	molecular spectroscopy	4401.2	0.272839	representative gas-phase vibrational frequency
1	N2 stretch	molecular spectroscopy	2358.6	0.146208	representative gas-phase vibrational frequency
2	CO stretch	infrared sensing and astrochemistry	2169.8	0.134511	representative gas-phase vibrational frequency
3	HCl stretch	laboratory and atmospheric spectroscopy	2885.9	0.178903	representative gas-phase vibrational frequency

	n	E_pinn	E_exact	delta_E	rel_l2	l_inf	sigma_x	sigma_p	sigma_x_sigma_p
0	0	0.500015	0.5	0.000015	0.001569	0.001253	0.707815	0.706344	0.499961
1	1	1.502434	1.5	0.002434	0.011747	0.008339	1.227998	1.221717	1.500266
2	2	2.566846	2.5	0.066846	0.036716	0.027703	1.607830	1.556461	2.502525
3	3	3.487327	3.5	0.012673	1.089362	0.722990	1.170451	0.932481	1.091424

	n	E_pinn	E_exact	delta_E	rel_l2	sigma_x_sigma_p
0	0	0.500015	0.5	0.000015	0.001569	0.499961
1	1	1.502434	1.5	0.002434	0.011747	1.500266
2	2	2.566846	2.5	0.066846	0.036716	2.502525
3	3	3.487327	3.5	0.012673	1.089362	1.091424

Adaptive Physics-Informed Eigenstate Recovery for Quantum Harmonic Systems¶

Abstract¶

Contributions to the Paper¶

§ 1. Modeling Formulation¶

Distinction From Vanilla Stationary PINNs¶

§ 2. Application Motivation¶

Role of the Anchor Dataset¶

3. Experimental Protocol and Evaluation Criteria¶

Evaluation Logic¶

4. Results and Visual Evidence¶

What to Emphasize in Discussion¶

5. Paper-Level Interpretation¶

What This Section Contributes¶

Scientific Relevance¶

Limitations Worth Stating Explicitly¶

	artifact	value
0	Ground-state rel-L2	1.569336e-03
1	Ground-state energy error	1.525879e-05
2	Squared overlap	0.99999754
3	Active interpreter	/Users/mohuyn/miniforge3/envs/qaoa/bin/python

	metric	value
0	Relative L2 error	0.001569
1	Linf error	0.001253
2	Energy abs. error	0.000015
3	Rayleigh gap	0.000004

	headline_metric	value
0	rel-L2	0.001569
1	Linf	0.001253
2	energy error	0.000015
3	overlap^2	0.999998