Skip to main content

nereids_fitting/
poisson.rs

1//! Poisson-likelihood optimizer for low-count neutron data (transmission
2//! path).
3//!
4//! Minimizes the single-arm Poisson negative log-likelihood
5//!
6//! ```text
7//! L(θ) = Σᵢ [y_model(θ)ᵢ − y_obs,ᵢ · ln(y_model(θ)ᵢ)]
8//! ```
9//!
10//! using a projected damped Gauss-Newton / Fisher optimizer with
11//! backtracking line search and finite-difference fallback.
12//!
13//! **Scope note.**  In the current pipeline this solver is only reached
14//! for the **transmission + PoissonKL** path (via
15//! `crate::transmission_model::TransmissionKLBackgroundModel`).  The
16//! **counts** path uses the joint-Poisson conditional-binomial-deviance
17//! solver in [`crate::joint_poisson`] (memo 35 §P1/§P2), which replaces
18//! the older fixed-flux counts NLL that lived here before the P2.2 /
19//! counts-KL collapse.  The helpers [`CountsModel`] and
20//! [`CountsBackgroundScaleModel`] exposed from this module are retained
21//! for the Fisher-info research helper
22//! [`crate::lm`]-side `evaluate_jacobian_and_fisher` and the spatial-
23//! regularization prototype scripts (Epic #394); they are not part of
24//! the production fit path.
25//!
26//! ## TRINIDI Reference
27//! - `trinidi/reconstruct.py` — Poisson NLL and APGM optimizer
28
29use nereids_core::constants::{PIVOT_FLOOR, POISSON_EPSILON};
30
31use crate::error::FittingError;
32use crate::lm::{FitModel, FlatMatrix};
33use crate::parameters::{FitParameter, ParameterSet};
34
35/// Configuration for the Poisson optimizer.
36#[derive(Debug, Clone)]
37pub struct PoissonConfig {
38    /// Maximum number of iterations.
39    pub max_iter: usize,
40    /// Step size for finite-difference gradient.
41    pub fd_step: f64,
42    /// Initial step size for line search.
43    pub step_size: f64,
44    /// Convergence tolerance used for both parameter displacement (L2 norm of step)
45    /// and gradient-norm convergence checks in `poisson_fit`.
46    pub tol_param: f64,
47    /// Armijo line search parameter (sufficient decrease).
48    pub armijo_c: f64,
49    /// Line search backtracking factor.
50    pub backtrack: f64,
51    /// Relative diagonal damping for analytical Gauss-Newton / Fisher steps.
52    pub gauss_newton_lambda: f64,
53    /// History size for the finite-difference L-BFGS fallback.
54    pub lbfgs_history: usize,
55    /// Whether to compute the Fisher covariance matrix (and uncertainties)
56    /// after convergence.  Set to `false` for per-pixel spatial mapping
57    /// when only densities are needed, avoiding extra model evaluations.
58    pub compute_covariance: bool,
59}
60
61impl Default for PoissonConfig {
62    fn default() -> Self {
63        Self {
64            max_iter: 200,
65            fd_step: 1e-7,
66            step_size: 1.0,
67            tol_param: 1e-8,
68            armijo_c: 1e-4,
69            backtrack: 0.5,
70            gauss_newton_lambda: 1e-3,
71            lbfgs_history: 8,
72            compute_covariance: true,
73        }
74    }
75}
76
77/// Result of Poisson-likelihood optimization.
78#[derive(Debug, Clone)]
79pub struct PoissonResult {
80    /// Final negative log-likelihood.
81    pub nll: f64,
82    /// Number of iterations taken.
83    pub iterations: usize,
84    /// Whether the optimizer converged.
85    pub converged: bool,
86    /// Final parameter values (all parameters, including fixed).
87    pub params: Vec<f64>,
88    /// Local covariance estimate from the inverse Fisher information matrix
89    /// at the converged parameters: `F⁻¹ = (J^T H J)⁻¹` where
90    /// `H = diag(obs/model²)` is the Poisson Hessian.
91    ///
92    /// This is a local curvature estimate, NOT a Bayesian posterior.
93    /// When an analytical Jacobian is available, it is used directly.
94    /// Otherwise a finite-difference Jacobian is computed as fallback.
95    /// `None` when the fit did not converge, the Fisher matrix is
96    /// singular, or covariance computation is disabled via config.
97    pub covariance: Option<FlatMatrix>,
98    /// Standard errors of free parameters: `√diag(F⁻¹)`.
99    /// `None` when covariance is not available.
100    pub uncertainties: Option<Vec<f64>>,
101}
102
103/// Compute Poisson negative log-likelihood.
104///
105/// NLL = Σᵢ [y_model - y_obs · ln(y_model)]
106///
107/// #109.2: For y_model ≤ epsilon, use a smooth C¹ quadratic extrapolation
108/// instead of a hard 1e30 penalty.  This keeps the NLL and its gradient
109/// continuous, so gradient-based optimizers (projected gradient, L-BFGS)
110/// can smoothly steer back into the feasible region rather than hitting
111/// a discontinuous cliff that stalls the line search.
112fn poisson_nll(y_obs: &[f64], y_model: &[f64]) -> f64 {
113    y_obs
114        .iter()
115        .zip(y_model.iter())
116        .map(|(&obs, &mdl)| poisson_nll_term(obs, mdl))
117        .sum()
118}
119
120/// Single-bin Poisson NLL with smooth extrapolation for mdl <= epsilon.
121///
122/// For mdl > 0: NLL = mdl - obs * ln(mdl)
123/// For mdl <= epsilon: quadratic Taylor expansion about epsilon,
124///   NLL(ε) + NLL'(ε)·(mdl−ε) + ½·NLL''(ε)·(mdl−ε)²
125/// where NLL'(x) = 1 − obs/x and NLL''(x) = obs/x².
126///
127/// Since delta = ε − mdl ≥ 0, this becomes:
128///   NLL(ε) − NLL'(ε)·delta + ½·NLL''(ε)·delta²
129///
130/// When obs == 0, the exact Hessian obs/ε² vanishes, leaving only a linear
131/// term that decreases without bound as mdl → −∞.  This can cause the
132/// optimizer to diverge.  We impose a minimum curvature of 1/ε so the
133/// quadratic penalty still curves upward for negative predictions.
134#[inline]
135fn poisson_nll_term(obs: f64, mdl: f64) -> f64 {
136    // #125.3: Negative observed counts would produce wrong-signed NLL terms.
137    // Release builds skip this check; callers must ensure non-negative counts. See #125 item 3.
138    debug_assert!(
139        obs.is_finite() && obs >= 0.0,
140        "poisson_nll_term: obs must be finite and >= 0, got {obs}"
141    );
142    if mdl > POISSON_EPSILON {
143        mdl - obs * mdl.ln()
144    } else {
145        let eps = POISSON_EPSILON;
146        let nll_eps = eps - obs * eps.ln();
147        let grad_eps = 1.0 - obs / eps;
148        // Minimum curvature 1/eps ensures the penalty grows quadratically
149        // even when obs == 0 (where the exact Hessian obs/eps^2 vanishes).
150        let hess_eps = if obs > 0.0 {
151            obs / (eps * eps)
152        } else {
153            1.0 / eps
154        };
155        let delta = eps - mdl;
156        // Taylor expansion: f(eps) + f'(eps)*(mdl - eps) + 0.5*f''(eps)*(mdl - eps)^2
157        // Since (mdl - eps) = -delta, the linear term flips sign.
158        nll_eps - grad_eps * delta + 0.5 * hess_eps * delta * delta
159    }
160}
161
162/// Per-bin Poisson NLL weight: ∂f(obs, mdl)/∂mdl.
163///
164/// For mdl > ε: w = 1 - obs/mdl
165/// For mdl ≤ ε: derivative of the smooth quadratic extrapolation,
166///   w = grad_eps - hess_eps · (ε - mdl), continuous at boundary.
167#[inline]
168fn poisson_nll_weight(obs: f64, mdl: f64) -> f64 {
169    if mdl > POISSON_EPSILON {
170        1.0 - obs / mdl
171    } else {
172        let eps = POISSON_EPSILON;
173        let grad_eps = 1.0 - obs / eps;
174        let hess_eps = if obs > 0.0 {
175            obs / (eps * eps)
176        } else {
177            1.0 / eps
178        };
179        grad_eps - hess_eps * (eps - mdl)
180    }
181}
182
183/// Per-bin Poisson NLL curvature: ∂²f(obs, mdl)/∂mdl².
184///
185/// For mdl > ε: h = obs / mdl²
186/// For mdl ≤ ε: curvature of the smooth quadratic extrapolation.
187#[inline]
188fn poisson_nll_curvature(obs: f64, mdl: f64) -> f64 {
189    if mdl > POISSON_EPSILON {
190        obs / (mdl * mdl)
191    } else {
192        let eps = POISSON_EPSILON;
193        if obs > 0.0 {
194            obs / (eps * eps)
195        } else {
196            1.0 / eps
197        }
198    }
199}
200
201/// Analytical first/second-order information for the Poisson objective.
202#[derive(Debug)]
203struct AnalyticalStepData {
204    /// Gradient of the Poisson NLL: grad = J^T · w.
205    grad: Vec<f64>,
206    /// Full Gauss-Newton / Fisher curvature approximation: J^T H J.
207    fisher: FlatMatrix,
208}
209
210/// Compute gradient and Gauss-Newton / Fisher curvature of the Poisson NLL
211/// using the analytical Jacobian.
212///
213/// `grad_j = Σᵢ wᵢ · J_{i,j}` where `wᵢ = ∂NLL/∂y_model_i`
214/// and `J_{i,j} = ∂y_model_i/∂θⱼ` from `model.analytical_jacobian()`.
215///
216/// The curvature uses the Poisson Hessian with respect to the model output:
217/// `fisher_{j,k} = Σᵢ hᵢ · J_{i,j} · J_{i,k}` where
218/// `hᵢ = ∂²NLL/∂y_model_i²`.
219///
220/// Returns `Some(step_data)` if the model provides an analytical Jacobian,
221/// `None` otherwise (caller should fall back to finite differences).
222fn compute_analytical_step_data(
223    model: &dyn FitModel,
224    params: &ParameterSet,
225    y_obs: &[f64],
226    y_model: &[f64],
227    all_vals_buf: &mut Vec<f64>,
228    free_idx_buf: &mut Vec<usize>,
229) -> Option<AnalyticalStepData> {
230    params.all_values_into(all_vals_buf);
231    params.free_indices_into(free_idx_buf);
232    let jac = model.analytical_jacobian(all_vals_buf, free_idx_buf, y_model)?;
233    let n_e = y_obs.len();
234    let n_free = free_idx_buf.len();
235    let mut grad = vec![0.0f64; n_free];
236    let mut fisher = FlatMatrix::zeros(n_free, n_free);
237    for i in 0..n_e {
238        let w = poisson_nll_weight(y_obs[i], y_model[i]);
239        let h = poisson_nll_curvature(y_obs[i], y_model[i]);
240        for (g, j) in grad.iter_mut().zip(0..n_free) {
241            let jij = jac.get(i, j);
242            *g += w * jij;
243            for k in 0..n_free {
244                *fisher.get_mut(j, k) += h * jij * jac.get(i, k);
245            }
246        }
247    }
248    Some(AnalyticalStepData { grad, fisher })
249}
250
251/// Compute gradient of Poisson NLL by finite differences.
252///
253/// `all_vals_buf` is a reusable scratch buffer for `params.all_values_into()`,
254/// avoiding a fresh allocation on every `model.evaluate()` call inside the
255/// per-parameter FD loop (N_free+1 allocations saved per gradient call).
256///
257/// `free_idx_buf` is a scratch buffer for `params.free_indices_into()`, reused
258/// across iterations to avoid per-gradient allocation.
259fn compute_gradient(
260    model: &dyn FitModel,
261    params: &mut ParameterSet,
262    y_obs: &[f64],
263    fd_step: f64,
264    all_vals_buf: &mut Vec<f64>,
265    free_idx_buf: &mut Vec<usize>,
266) -> Result<Vec<f64>, FittingError> {
267    params.all_values_into(all_vals_buf);
268    let base_model = model.evaluate(all_vals_buf)?;
269    let base_nll = poisson_nll(y_obs, &base_model);
270
271    params.free_indices_into(free_idx_buf);
272    let mut grad = vec![0.0; free_idx_buf.len()];
273
274    for (j, &idx) in free_idx_buf.iter().enumerate() {
275        let original = params.params[idx].value;
276        let step = fd_step * (1.0 + original.abs());
277
278        params.params[idx].value = original + step;
279        params.params[idx].clamp();
280        let mut actual_step = params.params[idx].value - original;
281
282        // #112: If the forward step is blocked by an upper bound, try the
283        // backward step so the gradient component is not frozen at zero.
284        if actual_step.abs() < PIVOT_FLOOR {
285            params.params[idx].value = original - step;
286            params.params[idx].clamp();
287            actual_step = params.params[idx].value - original;
288            if actual_step.abs() < PIVOT_FLOOR {
289                // Truly stuck at a point constraint — skip this parameter.
290                params.params[idx].value = original;
291                continue;
292            }
293        }
294
295        params.all_values_into(all_vals_buf);
296        let perturbed_model = match model.evaluate(all_vals_buf) {
297            Ok(v) => v,
298            Err(_) => {
299                params.params[idx].value = original;
300                continue;
301            }
302        };
303        let perturbed_nll = poisson_nll(y_obs, &perturbed_model);
304        params.params[idx].value = original;
305
306        grad[j] = (perturbed_nll - base_nll) / actual_step;
307    }
308
309    Ok(grad)
310}
311
312fn normalized_step_norm(
313    old_free: &[f64],
314    new_free: &[f64],
315    params: &ParameterSet,
316    free_param_indices: &[usize],
317) -> f64 {
318    old_free
319        .iter()
320        .zip(new_free.iter())
321        .zip(free_param_indices.iter())
322        .map(|((&old, &new), &idx)| {
323            let range = params.params[idx].upper - params.params[idx].lower;
324            let scale = if range.is_finite() && range > 1e-10 {
325                range
326            } else {
327                old.abs().max(1e-3)
328            };
329            ((old - new) / scale).powi(2)
330        })
331        .sum::<f64>()
332        .sqrt()
333}
334
335fn is_bound_active(param: &FitParameter, grad: f64) -> bool {
336    let at_lower = param.lower.is_finite() && (param.value - param.lower).abs() <= PIVOT_FLOOR;
337    let at_upper = param.upper.is_finite() && (param.value - param.upper).abs() <= PIVOT_FLOOR;
338    (at_lower && grad > 0.0) || (at_upper && grad < 0.0)
339}
340
341fn inactive_free_positions(
342    params: &ParameterSet,
343    free_param_indices: &[usize],
344    grad: &[f64],
345) -> Vec<usize> {
346    free_param_indices
347        .iter()
348        .zip(grad.iter())
349        .enumerate()
350        .filter_map(|(pos, (&idx, &g))| (!is_bound_active(&params.params[idx], g)).then_some(pos))
351        .collect()
352}
353
354fn inactive_free_mask(
355    params: &ParameterSet,
356    free_param_indices: &[usize],
357    grad: &[f64],
358) -> Vec<bool> {
359    free_param_indices
360        .iter()
361        .zip(grad.iter())
362        .map(|(&idx, &g)| !is_bound_active(&params.params[idx], g))
363        .collect()
364}
365
366fn projected_gradient_norm(
367    params: &ParameterSet,
368    free_param_indices: &[usize],
369    grad: &[f64],
370) -> f64 {
371    free_param_indices
372        .iter()
373        .zip(grad.iter())
374        .map(|(&idx, &g)| {
375            if is_bound_active(&params.params[idx], g) {
376                0.0
377            } else {
378                g * g
379            }
380        })
381        .sum::<f64>()
382        .sqrt()
383}
384
385fn extract_submatrix(matrix: &FlatMatrix, positions: &[usize]) -> FlatMatrix {
386    let n = positions.len();
387    let mut sub = FlatMatrix::zeros(n, n);
388    for (row_out, &row_in) in positions.iter().enumerate() {
389        for (col_out, &col_in) in positions.iter().enumerate() {
390            *sub.get_mut(row_out, col_out) = matrix.get(row_in, col_in);
391        }
392    }
393    sub
394}
395
396#[derive(Debug, Clone)]
397struct LbfgsHistory {
398    s_list: Vec<Vec<f64>>,
399    y_list: Vec<Vec<f64>>,
400    max_pairs: usize,
401}
402
403impl LbfgsHistory {
404    fn new(max_pairs: usize) -> Self {
405        Self {
406            s_list: Vec::with_capacity(max_pairs),
407            y_list: Vec::with_capacity(max_pairs),
408            max_pairs,
409        }
410    }
411
412    fn clear(&mut self) {
413        self.s_list.clear();
414        self.y_list.clear();
415    }
416
417    fn update(&mut self, old_free: &[f64], new_free: &[f64], old_grad: &[f64], new_grad: &[f64]) {
418        if self.max_pairs == 0 {
419            return;
420        }
421        let s: Vec<f64> = new_free
422            .iter()
423            .zip(old_free.iter())
424            .map(|(&new, &old)| new - old)
425            .collect();
426        let y: Vec<f64> = new_grad
427            .iter()
428            .zip(old_grad.iter())
429            .map(|(&new, &old)| new - old)
430            .collect();
431        let sy = dot(&s, &y);
432        let s_norm = dot(&s, &s).sqrt();
433        let y_norm = dot(&y, &y).sqrt();
434        if sy <= 1e-12 * s_norm * y_norm.max(1.0) {
435            return;
436        }
437        if self.s_list.len() == self.max_pairs {
438            self.s_list.remove(0);
439            self.y_list.remove(0);
440        }
441        self.s_list.push(s);
442        self.y_list.push(y);
443    }
444
445    fn apply_on_positions(&self, grad: &[f64], positions: &[usize]) -> Option<Vec<f64>> {
446        if self.s_list.is_empty() || positions.is_empty() {
447            return None;
448        }
449
450        let mut q: Vec<f64> = positions.iter().map(|&pos| grad[pos]).collect();
451        let mut alpha = vec![0.0; self.s_list.len()];
452        let mut rho = vec![0.0; self.s_list.len()];
453        let mut used = vec![false; self.s_list.len()];
454
455        for i in (0..self.s_list.len()).rev() {
456            let s_sub = extract_positions(&self.s_list[i], positions);
457            let y_sub = extract_positions(&self.y_list[i], positions);
458            let sy = dot(&s_sub, &y_sub);
459            if sy <= 1e-12 {
460                continue;
461            }
462            rho[i] = 1.0 / sy;
463            used[i] = true;
464            alpha[i] = rho[i] * dot(&s_sub, &q);
465            axpy(&mut q, -alpha[i], &y_sub);
466        }
467
468        let gamma = used
469            .iter()
470            .enumerate()
471            .rev()
472            .find_map(|(i, &is_used)| {
473                if !is_used {
474                    return None;
475                }
476                let last_s = extract_positions(&self.s_list[i], positions);
477                let last_y = extract_positions(&self.y_list[i], positions);
478                let yy = dot(&last_y, &last_y);
479                (yy > 0.0).then_some(dot(&last_s, &last_y) / yy)
480            })
481            .unwrap_or(1.0);
482
483        let mut r: Vec<f64> = q.into_iter().map(|v| gamma * v).collect();
484        for i in 0..self.s_list.len() {
485            if !used[i] {
486                continue;
487            }
488            let s_sub = extract_positions(&self.s_list[i], positions);
489            let y_sub = extract_positions(&self.y_list[i], positions);
490            let beta = rho[i] * dot(&y_sub, &r);
491            axpy(&mut r, alpha[i] - beta, &s_sub);
492        }
493
494        let mut full = vec![0.0; grad.len()];
495        for (&pos, &value) in positions.iter().zip(r.iter()) {
496            full[pos] = value;
497        }
498        Some(full)
499    }
500}
501
502fn dot(a: &[f64], b: &[f64]) -> f64 {
503    a.iter().zip(b.iter()).map(|(&x, &y)| x * y).sum()
504}
505
506fn axpy(dst: &mut [f64], alpha: f64, src: &[f64]) {
507    for (d, &s) in dst.iter_mut().zip(src.iter()) {
508        *d += alpha * s;
509    }
510}
511
512fn extract_positions(values: &[f64], positions: &[usize]) -> Vec<f64> {
513    positions.iter().map(|&pos| values[pos]).collect()
514}
515
516fn parameter_scaled_gradient_direction(
517    params: &ParameterSet,
518    free_param_indices: &[usize],
519    grad: &[f64],
520) -> Vec<f64> {
521    grad.iter()
522        .zip(free_param_indices.iter())
523        .map(|(&g, &idx)| {
524            if is_bound_active(&params.params[idx], g) {
525                return 0.0;
526            }
527            let p = &params.params[idx];
528            let range = p.upper - p.lower;
529            if range.is_finite() && range > 1e-10 {
530                g * range * range
531            } else {
532                let scale = p.value.abs().max(1e-3);
533                g * scale * scale
534            }
535        })
536        .collect()
537}
538
539fn max_feasible_step(
540    params: &ParameterSet,
541    free_param_indices: &[usize],
542    old_free: &[f64],
543    search_dir: &[f64],
544) -> f64 {
545    let mut alpha_max = f64::INFINITY;
546    for ((&idx, &x), &d) in free_param_indices
547        .iter()
548        .zip(old_free.iter())
549        .zip(search_dir.iter())
550    {
551        if d.abs() <= PIVOT_FLOOR {
552            continue;
553        }
554        let p = &params.params[idx];
555        let candidate = if d > 0.0 && p.lower.is_finite() {
556            (x - p.lower) / d
557        } else if d < 0.0 && p.upper.is_finite() {
558            (p.upper - x) / (-d)
559        } else {
560            f64::INFINITY
561        };
562        alpha_max = alpha_max.min(candidate);
563    }
564    alpha_max.max(0.0)
565}
566
567enum LineSearchResult {
568    Accepted {
569        nll: f64,
570        y_model: Vec<f64>,
571        hit_boundary: bool,
572    },
573    Stagnated,
574    Failed,
575}
576
577const MAX_FACE_STEPS_PER_ITER: usize = 4;
578
579/// Backtracking line search with Armijo condition.
580///
581/// Backtracking line search with Armijo sufficient-decrease condition:
582/// try a step, reject NaN/Inf model outputs, check the Armijo sufficient-decrease
583/// condition, and backtrack if needed.
584///
585/// # Arguments
586/// * `model`        — Forward model (maps parameters -> predicted counts).
587/// * `params`       — Parameter set (modified in place on success).
588/// * `y_obs`        — Observed counts.
589/// * `old_free`     — Free parameter values before the step.
590/// * `search_dir`   — Search direction (gradient or preconditioned gradient).
591/// * `initial_alpha`— Initial step size.
592/// * `config`       — Optimizer configuration (backtrack factor, Armijo c).
593/// * `grad`         — Raw gradient (used for Armijo descent computation).
594/// * `nll`          — Current negative log-likelihood.
595/// * `all_vals_buf` — Scratch buffer for `params.all_values_into()`, reused
596///   across the up-to-50 backtracking iterations to avoid per-trial allocation.
597/// * `free_vals_buf`— Scratch buffer for `params.free_values_into()`.
598/// * `trial_free_buf` — Scratch buffer for the trial free-parameter vector,
599///   reused across backtracking iterations to avoid up to 50 allocations.
600///
601/// # Returns
602/// `Some((new_nll, y_model))` if a step was accepted, `None` if the line search
603/// exhausted all backtracking attempts. Returns the model output alongside NLL
604/// so the caller can cache it for the next analytical gradient computation.
605///
606/// # Failure contract
607///
608/// On `None` return (line search exhausted), `params` is restored to
609/// `old_free` before returning. Callers need not restore manually.
610// All 12 arguments are genuinely needed: 9 original + 3 scratch buffers that
611// avoid per-backtracking-iteration allocations inside the 50-trial loop.
612#[allow(clippy::too_many_arguments)]
613fn backtracking_line_search(
614    model: &dyn FitModel,
615    params: &mut ParameterSet,
616    y_obs: &[f64],
617    old_free: &[f64],
618    free_param_indices: &[usize],
619    search_dir: &[f64],
620    initial_alpha: f64,
621    config: &PoissonConfig,
622    grad: &[f64],
623    nll: f64,
624    all_vals_buf: &mut Vec<f64>,
625    free_vals_buf: &mut Vec<f64>,
626    trial_free_buf: &mut Vec<f64>,
627) -> LineSearchResult {
628    let alpha_max = max_feasible_step(params, free_param_indices, old_free, search_dir);
629    if alpha_max <= PIVOT_FLOOR {
630        params.set_free_values(old_free);
631        return LineSearchResult::Stagnated;
632    }
633    let mut alpha = initial_alpha.min(alpha_max);
634    for _ in 0..50 {
635        // Trial step along the feasible path: x_new = x - alpha * d, with
636        // alpha capped so inactive-subspace directions hit bounds exactly
637        // instead of relying on projection to distort the step.
638        trial_free_buf.clear();
639        for ((&idx, &v), &d) in free_param_indices
640            .iter()
641            .zip(old_free.iter())
642            .zip(search_dir.iter())
643        {
644            let p = &params.params[idx];
645            trial_free_buf.push((v - alpha * d).clamp(p.lower, p.upper));
646        }
647        params.set_free_values(trial_free_buf);
648
649        params.all_values_into(all_vals_buf);
650        let trial_model = match model.evaluate(all_vals_buf) {
651            Ok(v) => v,
652            Err(_) => {
653                alpha *= config.backtrack;
654                continue;
655            }
656        };
657
658        // #113: If the model produced NaN/Inf, reduce step size rather
659        // than accepting a garbage NLL.
660        if trial_model.iter().any(|v| !v.is_finite()) {
661            alpha *= config.backtrack;
662            continue;
663        }
664
665        let trial_nll = poisson_nll(y_obs, &trial_model);
666
667        // Armijo condition: f(x_new) <= f(x) - c * descent
668        params.free_values_into(free_vals_buf);
669        let step_norm = normalized_step_norm(old_free, free_vals_buf, params, free_param_indices);
670        let descent = grad
671            .iter()
672            .zip(old_free.iter())
673            .zip(free_vals_buf.iter())
674            .map(|((&g, &old), &new)| g * (old - new))
675            .sum::<f64>();
676
677        if trial_nll.is_finite() && trial_nll <= nll - config.armijo_c * descent {
678            return LineSearchResult::Accepted {
679                nll: trial_nll,
680                y_model: trial_model,
681                hit_boundary: alpha_max.is_finite()
682                    && (alpha_max - alpha).abs() <= 1e-12 * alpha_max.max(1.0),
683            };
684        }
685
686        let nll_delta = (trial_nll - nll).abs();
687        let nll_scale = trial_nll.abs().max(nll.abs()).max(1.0);
688        if trial_nll.is_finite()
689            && step_norm < config.tol_param
690            && nll_delta <= config.tol_param * nll_scale
691        {
692            params.set_free_values(old_free);
693            return LineSearchResult::Stagnated;
694        }
695
696        // Backtrack
697        alpha *= config.backtrack;
698        if alpha <= PIVOT_FLOOR {
699            break;
700        }
701    }
702    params.set_free_values(old_free);
703    LineSearchResult::Failed
704}
705
706/// Early return for all-fixed parameters: evaluate once and report.
707///
708/// Returns `Ok(Some(PoissonResult))` if all parameters are fixed (either a
709/// valid result with converged=true, or a non-finite NLL with converged=false).
710/// Returns `Ok(None)` if there are free parameters and optimization should
711/// proceed. Returns `Err(FittingError)` if model evaluation fails.
712fn try_early_return_fixed(
713    model: &dyn FitModel,
714    y_obs: &[f64],
715    params: &ParameterSet,
716) -> Result<Option<PoissonResult>, FittingError> {
717    if params.n_free() != 0 {
718        return Ok(None);
719    }
720    let y_model = model.evaluate(&params.all_values())?;
721    let nll = poisson_nll(y_obs, &y_model);
722    if !nll.is_finite() {
723        return Ok(Some(PoissonResult {
724            nll,
725            iterations: 0,
726            converged: false,
727            params: params.all_values(),
728            covariance: None,
729            uncertainties: None,
730        }));
731    }
732    Ok(Some(PoissonResult {
733        nll,
734        iterations: 0,
735        converged: true,
736        params: params.all_values(),
737        covariance: None,
738        uncertainties: None,
739    }))
740}
741
742/// Build the Poisson Fisher information matrix via finite-difference Jacobian.
743///
744/// Used as fallback when the model does not provide an analytical Jacobian
745/// (e.g., `TransmissionFitModel` without base_xs).  Returns `None` if any
746/// model evaluation fails during the FD perturbation.
747///
748/// Respects parameter bounds via `clamp()` and uses the actual clamped step
749/// size.  When a bound blocks the central-difference step in one direction,
750/// falls back to one-sided difference.
751fn compute_fd_fisher(
752    model: &dyn FitModel,
753    params: &mut ParameterSet,
754    y_obs: &[f64],
755    y_model: &[f64],
756    fd_step: f64,
757    all_vals_buf: &mut Vec<f64>,
758    free_idx_buf: &mut Vec<usize>,
759) -> Option<FlatMatrix> {
760    params.free_indices_into(free_idx_buf);
761    let n_free = free_idx_buf.len();
762    let n_e = y_obs.len();
763
764    let mut jac = FlatMatrix::zeros(n_e, n_free);
765    for (col, &fi) in free_idx_buf.iter().enumerate() {
766        let orig = params.params[fi].value;
767        let h = fd_step * (1.0 + orig.abs());
768
769        // Forward perturbation (clamped to bounds).
770        params.params[fi].value = orig + h;
771        params.params[fi].clamp();
772        let step_plus = params.params[fi].value - orig;
773
774        let y_plus = if step_plus.abs() > PIVOT_FLOOR {
775            params.all_values_into(all_vals_buf);
776            let y = match model.evaluate(all_vals_buf) {
777                Ok(v) => v,
778                Err(_) => {
779                    params.params[fi].value = orig;
780                    return None;
781                }
782            };
783            Some(y)
784        } else {
785            None
786        };
787
788        // Backward perturbation (clamped to bounds).
789        params.params[fi].value = orig - h;
790        params.params[fi].clamp();
791        let step_minus = params.params[fi].value - orig;
792
793        let y_minus = if step_minus.abs() > PIVOT_FLOOR {
794            params.all_values_into(all_vals_buf);
795            let y = match model.evaluate(all_vals_buf) {
796                Ok(v) => v,
797                Err(_) => {
798                    params.params[fi].value = orig;
799                    return None;
800                }
801            };
802            Some(y)
803        } else {
804            None
805        };
806
807        params.params[fi].value = orig;
808
809        // Central difference, or one-sided fallback at bounds.
810        match (&y_plus, &y_minus) {
811            (Some(yp), Some(ym)) => {
812                let denom = step_plus - step_minus;
813                for (i, (&vp, &vm)) in yp.iter().zip(ym.iter()).enumerate() {
814                    *jac.get_mut(i, col) = (vp - vm) / denom;
815                }
816            }
817            (Some(yp), None) => {
818                for (i, (&vp, &v0)) in yp.iter().zip(y_model.iter()).enumerate() {
819                    *jac.get_mut(i, col) = (vp - v0) / step_plus;
820                }
821            }
822            (None, Some(ym)) => {
823                for (i, (&v0, &vm)) in y_model.iter().zip(ym.iter()).enumerate() {
824                    *jac.get_mut(i, col) = (v0 - vm) / (-step_minus);
825                }
826            }
827            (None, None) => {
828                // Both directions blocked — Jacobian column stays zero.
829            }
830        }
831    }
832
833    let mut fisher = FlatMatrix::zeros(n_free, n_free);
834    for i in 0..n_e {
835        let h_i = poisson_nll_curvature(y_obs[i], y_model[i]);
836        for j in 0..n_free {
837            let jij = jac.get(i, j);
838            for k in 0..n_free {
839                *fisher.get_mut(j, k) += h_i * jij * jac.get(i, k);
840            }
841        }
842    }
843    Some(fisher)
844}
845
846/// Run Poisson-likelihood optimization using a projected KL optimizer.
847///
848/// Uses damped Gauss-Newton / Fisher steps when an analytical Jacobian is
849/// available, falling back to projected gradient descent otherwise. Both paths
850/// use backtracking line search with Armijo condition and projection onto
851/// parameter bounds after each step.
852///
853/// # Arguments
854/// * `model` — Forward model (maps parameters → predicted counts).
855/// * `y_obs` — Observed counts at each data point.
856/// * `params` — Parameter set (modified in place).
857/// * `config` — Optimizer configuration.
858///
859/// # Returns
860/// `Ok(PoissonResult)` with final NLL, parameters, and convergence status.
861/// `Err(FittingError)` if model evaluation fails at the initial point.
862/// Evaluation errors during line-search trials are treated as bad steps
863/// (backtrack), not fatal errors.
864pub fn poisson_fit(
865    model: &dyn FitModel,
866    y_obs: &[f64],
867    params: &mut ParameterSet,
868    config: &PoissonConfig,
869) -> Result<PoissonResult, FittingError> {
870    if let Some(result) = try_early_return_fixed(model, y_obs, params)? {
871        return Ok(result);
872    }
873
874    // Scratch buffers reused across the entire optimization loop to avoid
875    // per-iteration allocations inside compute_gradient (N_free+1 calls)
876    // and backtracking_line_search (up to 50 calls).
877    let mut all_vals_buf = Vec::with_capacity(params.params.len());
878    let mut free_vals_buf = Vec::with_capacity(params.n_free());
879    let mut old_free_buf: Vec<f64> = Vec::with_capacity(params.n_free());
880    let mut trial_free_buf: Vec<f64> = Vec::with_capacity(params.n_free());
881    let mut free_idx_buf: Vec<usize> = Vec::with_capacity(params.n_free());
882    let mut fd_history = LbfgsHistory::new(config.lbfgs_history);
883    let mut pending_fd_state: Option<(Vec<f64>, Vec<f64>, Vec<bool>)> = None;
884
885    params.all_values_into(&mut all_vals_buf);
886    let mut y_model = model.evaluate(&all_vals_buf)?;
887    let mut nll = poisson_nll(y_obs, &y_model);
888
889    // Guard: if the initial NLL is non-finite, bail out immediately rather
890    // than entering the optimization loop with garbage values.
891    if !nll.is_finite() {
892        return Ok(PoissonResult {
893            nll,
894            iterations: 0,
895            converged: false,
896            params: params.all_values(),
897            covariance: None,
898            uncertainties: None,
899        });
900    }
901
902    let mut converged = false;
903    let mut iter = 0;
904
905    'outer: for _ in 0..config.max_iter {
906        iter += 1;
907        let mut face_steps = 0usize;
908
909        loop {
910            // Compute gradient: try analytical (grad = J^T · w) first,
911            // fall back to finite differences if the model doesn't provide
912            // an analytical Jacobian.
913            let analytical_step = compute_analytical_step_data(
914                model,
915                params,
916                y_obs,
917                &y_model,
918                &mut all_vals_buf,
919                &mut free_idx_buf,
920            );
921            let grad = if let Some(ref analytical) = analytical_step {
922                analytical.grad.clone()
923            } else {
924                compute_gradient(
925                    model,
926                    params,
927                    y_obs,
928                    config.fd_step,
929                    &mut all_vals_buf,
930                    &mut free_idx_buf,
931                )?
932            };
933
934            params.free_indices_into(&mut free_idx_buf);
935
936            let using_fd = analytical_step.is_none();
937            if using_fd {
938                params.free_values_into(&mut free_vals_buf);
939                let current_mask = inactive_free_mask(params, &free_idx_buf, &grad);
940                if let Some((prev_free, prev_grad, prev_mask)) = pending_fd_state.take() {
941                    if prev_mask == current_mask {
942                        fd_history.update(&prev_free, &free_vals_buf, &prev_grad, &grad);
943                    } else {
944                        fd_history.clear();
945                    }
946                }
947                pending_fd_state = Some((free_vals_buf.clone(), grad.clone(), current_mask));
948            } else {
949                pending_fd_state.take();
950                fd_history.clear();
951            }
952
953            // Use projected-gradient optimality for bound-constrained problems.
954            let projected_grad_norm = projected_gradient_norm(params, &free_idx_buf, &grad);
955            if projected_grad_norm < config.tol_param {
956                converged = true;
957                break 'outer;
958            }
959
960            let (search_dir, initial_alpha): (Vec<f64>, f64) =
961                if let Some(ref analytical) = analytical_step {
962                    let inactive_positions = inactive_free_positions(params, &free_idx_buf, &grad);
963                    if inactive_positions.is_empty() {
964                        converged = true;
965                        break 'outer;
966                    }
967                    let reduced_fisher = extract_submatrix(&analytical.fisher, &inactive_positions);
968                    let reduced_grad: Vec<f64> =
969                        inactive_positions.iter().map(|&pos| grad[pos]).collect();
970                    let reduced_dir = crate::lm::solve_damped_system(
971                        &reduced_fisher,
972                        &reduced_grad,
973                        config.gauss_newton_lambda,
974                    )
975                    .unwrap_or_else(|| {
976                        reduced_grad
977                            .iter()
978                            .enumerate()
979                            .map(|(j, &g)| g / reduced_fisher.get(j, j).max(1e-12))
980                            .collect()
981                    });
982                    let mut dir = vec![0.0; grad.len()];
983                    for (&pos, &value) in inactive_positions.iter().zip(reduced_dir.iter()) {
984                        dir[pos] = value;
985                    }
986                    (dir, config.step_size)
987                } else {
988                    let inactive_positions = inactive_free_positions(params, &free_idx_buf, &grad);
989                    if inactive_positions.is_empty() {
990                        converged = true;
991                        break 'outer;
992                    }
993                    let used_history = !fd_history.s_list.is_empty();
994                    let mut dir = fd_history
995                        .apply_on_positions(&grad, &inactive_positions)
996                        .unwrap_or_else(|| {
997                            parameter_scaled_gradient_direction(params, &free_idx_buf, &grad)
998                        });
999                    let descent = dot(&grad, &dir);
1000                    if !descent.is_finite() || descent <= 0.0 {
1001                        dir = parameter_scaled_gradient_direction(params, &free_idx_buf, &grad);
1002                    }
1003                    if used_history && descent.is_finite() && descent > 0.0 {
1004                        (dir, config.step_size)
1005                    } else {
1006                        let search_norm: f64 = dir.iter().map(|d| d * d).sum::<f64>().sqrt();
1007                        (dir, config.step_size / search_norm.max(1.0))
1008                    }
1009                };
1010
1011            params.free_values_into(&mut free_vals_buf);
1012            old_free_buf.clear();
1013            old_free_buf.extend_from_slice(&free_vals_buf);
1014
1015            match backtracking_line_search(
1016                model,
1017                params,
1018                y_obs,
1019                &old_free_buf,
1020                &free_idx_buf,
1021                &search_dir,
1022                initial_alpha,
1023                config,
1024                &grad,
1025                nll,
1026                &mut all_vals_buf,
1027                &mut free_vals_buf,
1028                &mut trial_free_buf,
1029            ) {
1030                LineSearchResult::Accepted {
1031                    nll: new_nll,
1032                    y_model: new_y_model,
1033                    hit_boundary,
1034                } => {
1035                    if !using_fd {
1036                        pending_fd_state = None;
1037                    }
1038                    nll = new_nll;
1039                    y_model = new_y_model;
1040
1041                    if hit_boundary && face_steps < MAX_FACE_STEPS_PER_ITER {
1042                        face_steps += 1;
1043                        continue;
1044                    }
1045                }
1046                LineSearchResult::Stagnated => {
1047                    converged = true;
1048                    break 'outer;
1049                }
1050                LineSearchResult::Failed => {
1051                    break 'outer;
1052                }
1053            }
1054
1055            params.free_values_into(&mut free_vals_buf);
1056            let step_norm =
1057                normalized_step_norm(&old_free_buf, &free_vals_buf, params, &free_idx_buf);
1058            if step_norm < config.tol_param {
1059                converged = true;
1060                break 'outer;
1061            }
1062
1063            break;
1064        }
1065    }
1066
1067    // Compute local covariance from the inverse Fisher information matrix
1068    // at the converged parameters: cov = (J^T H J)^{-1}.
1069    // This is a local curvature estimate, not a Bayesian posterior.
1070    // Gated by config.compute_covariance to avoid extra evaluations when
1071    // the caller only needs densities (e.g., per-pixel spatial mapping).
1072    let (covariance, uncertainties) = if converged && config.compute_covariance {
1073        // Use the final y_model from the last accepted step rather than
1074        // re-evaluating (avoids extra model call and cannot turn a
1075        // successful fit into an error during post-processing).
1076
1077        // Build the Fisher information matrix J^T H J from the Jacobian at
1078        // the converged parameters.  Try the analytical Jacobian first; fall
1079        // back to finite differences if not available.
1080        let fisher_opt = if let Some(step_data) = compute_analytical_step_data(
1081            model,
1082            params,
1083            y_obs,
1084            &y_model,
1085            &mut all_vals_buf,
1086            &mut free_idx_buf,
1087        ) {
1088            Some(step_data.fisher)
1089        } else {
1090            // Build Fisher via FD Jacobian.
1091            compute_fd_fisher(
1092                model,
1093                params,
1094                y_obs,
1095                &y_model,
1096                config.fd_step,
1097                &mut all_vals_buf,
1098                &mut free_idx_buf,
1099            )
1100        };
1101
1102        if let Some(fisher) = fisher_opt {
1103            if let Some(cov) = crate::lm::invert_matrix(&fisher) {
1104                let n_free = cov.nrows;
1105                let unc: Vec<f64> = (0..n_free)
1106                    .map(|i| {
1107                        let d = cov.get(i, i);
1108                        if d.is_finite() && d > 0.0 {
1109                            d.sqrt()
1110                        } else {
1111                            f64::NAN
1112                        }
1113                    })
1114                    .collect();
1115                (Some(cov), Some(unc))
1116            } else {
1117                (None, None)
1118            }
1119        } else {
1120            (None, None)
1121        }
1122    } else {
1123        (None, None)
1124    };
1125
1126    Ok(PoissonResult {
1127        nll,
1128        iterations: iter,
1129        converged,
1130        params: params.all_values(),
1131        covariance,
1132        uncertainties,
1133    })
1134}
1135
1136/// Fixed-flux counts-domain forward model: `Y_model = flux × T_model(θ) + background`.
1137///
1138/// **Retained for the research Fisher helper, not for production fitting.**
1139/// The production counts-KL dispatch (`SolverConfig::PoissonKL` on
1140/// `InputData::Counts` / `InputData::CountsWithNuisance`) goes through
1141/// the joint-Poisson conditional-binomial-deviance path in
1142/// [`crate::joint_poisson`] per memo 35 §P1/§P2.  `CountsModel` and
1143/// [`CountsBackgroundScaleModel`] below are consumed only by
1144/// `nereids_pipeline::pipeline::evaluate_jacobian_and_fisher` (the
1145/// Fisher-info research helper used by the spatial-regularization
1146/// epic #394) and by this module's `#[cfg(test)]` tests.  They assume
1147/// the caller has pre-computed `flux = c · O` (i.e. `c` is baked into
1148/// `flux` — the convention that memo 35 §P1 flagged as error-prone for
1149/// end users, which is precisely why the production path no longer
1150/// uses this struct).
1151///
1152/// The `flux` and `background` slices must have the same length as the
1153/// transmission vector returned by the inner model.  In debug builds,
1154/// `evaluate()` asserts this invariant.
1155pub struct CountsModel<'a> {
1156    /// Underlying transmission model.
1157    pub transmission_model: &'a dyn FitModel,
1158    /// Incident flux (counts per bin in open beam, after normalization).
1159    pub flux: &'a [f64],
1160    /// Background counts per bin.
1161    pub background: &'a [f64],
1162    /// Total parameter count in the wrapped model.
1163    pub n_params: usize,
1164}
1165
1166impl<'a> FitModel for CountsModel<'a> {
1167    fn evaluate(&self, params: &[f64]) -> Result<Vec<f64>, FittingError> {
1168        let transmission = self.transmission_model.evaluate(params)?;
1169        debug_assert_eq!(
1170            transmission.len(),
1171            self.flux.len(),
1172            "CountsModel: transmission length ({}) != flux length ({})",
1173            transmission.len(),
1174            self.flux.len(),
1175        );
1176        debug_assert_eq!(
1177            self.flux.len(),
1178            self.background.len(),
1179            "CountsModel: flux length ({}) != background length ({})",
1180            self.flux.len(),
1181            self.background.len(),
1182        );
1183        Ok(transmission
1184            .iter()
1185            .zip(self.flux.iter())
1186            .zip(self.background.iter())
1187            .map(|((&t, &f), &b)| f * t + b)
1188            .collect())
1189    }
1190
1191    /// Analytical Jacobian: ∂Y/∂θ = flux · ∂T_inner/∂θ.
1192    ///
1193    /// Background is constant w.r.t. θ and drops out.
1194    fn analytical_jacobian(
1195        &self,
1196        params: &[f64],
1197        free_param_indices: &[usize],
1198        y_current: &[f64],
1199    ) -> Option<FlatMatrix> {
1200        let n_e = y_current.len();
1201        // Recover inner transmission: T = (Y - background) / flux
1202        let t_inner: Vec<f64> = y_current
1203            .iter()
1204            .zip(self.flux.iter())
1205            .zip(self.background.iter())
1206            .map(|((&y, &f), &b)| if f.abs() > 1e-30 { (y - b) / f } else { 0.0 })
1207            .collect();
1208        let inner_jac =
1209            self.transmission_model
1210                .analytical_jacobian(params, free_param_indices, &t_inner)?;
1211        let n_free = free_param_indices.len();
1212        let mut jac = FlatMatrix::zeros(n_e, n_free);
1213        for i in 0..n_e {
1214            for j in 0..n_free {
1215                *jac.get_mut(i, j) = self.flux[i] * inner_jac.get(i, j);
1216            }
1217        }
1218        Some(jac)
1219    }
1220}
1221
1222// ── ForwardModel implementation for CountsModel (Phase 1) ────────────────
1223
1224impl<'a> crate::forward_model::ForwardModel for CountsModel<'a> {
1225    fn predict(&self, params: &[f64]) -> Result<Vec<f64>, FittingError> {
1226        self.evaluate(params)
1227    }
1228
1229    // No analytical jacobian — uses finite differences (same as FitModel).
1230
1231    fn n_data(&self) -> usize {
1232        self.flux.len()
1233    }
1234
1235    fn n_params(&self) -> usize {
1236        self.n_params
1237    }
1238}
1239
1240/// Fixed-flux counts model with optional α₁ / α₂ nuisance scaling of
1241/// signal and detector background.
1242///
1243/// **Retained for the research Fisher helper, not for production fitting.**
1244/// See [`CountsModel`] for the scope note — the production counts-KL
1245/// dispatch does not use this struct; it is reached only from
1246/// `evaluate_jacobian_and_fisher` (Epic #394 spatial-regularization
1247/// prototype) and from this module's `#[cfg(test)]` tests.
1248///
1249/// Given a transmission model `T(θ)`, predicts:
1250///
1251///   Y(E) = α₁ · [Φ(E) · T(θ)] + α₂ · B(E)
1252///
1253/// where `α₁` and `α₂` are parameter-vector entries.
1254pub struct CountsBackgroundScaleModel<'a> {
1255    /// Underlying transmission model.
1256    pub transmission_model: &'a dyn FitModel,
1257    /// Incident flux spectrum.
1258    pub flux: &'a [f64],
1259    /// Detector background spectrum.
1260    pub background: &'a [f64],
1261    /// Index of α₁ in the parameter vector.
1262    pub alpha1_index: usize,
1263    /// Index of α₂ in the parameter vector.
1264    pub alpha2_index: usize,
1265    /// Total parameter count in the wrapped model.
1266    pub n_params: usize,
1267}
1268
1269impl<'a> FitModel for CountsBackgroundScaleModel<'a> {
1270    fn evaluate(&self, params: &[f64]) -> Result<Vec<f64>, FittingError> {
1271        let transmission = self.transmission_model.evaluate(params)?;
1272        let alpha1 = params[self.alpha1_index];
1273        let alpha2 = params[self.alpha2_index];
1274        debug_assert_eq!(transmission.len(), self.flux.len());
1275        debug_assert_eq!(self.flux.len(), self.background.len());
1276        Ok(transmission
1277            .iter()
1278            .zip(self.flux.iter())
1279            .zip(self.background.iter())
1280            .map(|((&t, &f), &b)| alpha1 * f * t + alpha2 * b)
1281            .collect())
1282    }
1283
1284    fn analytical_jacobian(
1285        &self,
1286        params: &[f64],
1287        free_param_indices: &[usize],
1288        y_current: &[f64],
1289    ) -> Option<FlatMatrix> {
1290        let n_e = y_current.len();
1291        let n_free = free_param_indices.len();
1292        let alpha1 = params[self.alpha1_index];
1293        let alpha1_col = free_param_indices
1294            .iter()
1295            .position(|&i| i == self.alpha1_index);
1296        let alpha2_col = free_param_indices
1297            .iter()
1298            .position(|&i| i == self.alpha2_index);
1299        let inner_free: Vec<usize> = free_param_indices
1300            .iter()
1301            .copied()
1302            .filter(|&i| i != self.alpha1_index && i != self.alpha2_index)
1303            .collect();
1304
1305        // Evaluate the inner transmission model directly instead of
1306        // reconstructing from y_current — reconstruction via
1307        // (y - alpha2*b)/(alpha1*f) is undefined when alpha1 ≈ 0.
1308        let t_inner = match self.transmission_model.evaluate(params) {
1309            Ok(t) => t,
1310            Err(_) => return None,
1311        };
1312
1313        let inner_jac = if !inner_free.is_empty() {
1314            self.transmission_model
1315                .analytical_jacobian(params, &inner_free, &t_inner)
1316        } else {
1317            None
1318        };
1319
1320        let mut jacobian = FlatMatrix::zeros(n_e, n_free);
1321        if let Some(ref ij) = inner_jac {
1322            let mut inner_col = 0;
1323            for (col, &fp) in free_param_indices.iter().enumerate() {
1324                if fp == self.alpha1_index || fp == self.alpha2_index {
1325                    continue;
1326                }
1327                for row in 0..n_e {
1328                    *jacobian.get_mut(row, col) = alpha1 * self.flux[row] * ij.get(row, inner_col);
1329                }
1330                inner_col += 1;
1331            }
1332        } else if !inner_free.is_empty() {
1333            return None;
1334        }
1335
1336        if let Some(col) = alpha1_col {
1337            for (row, (&f, &t)) in self.flux.iter().zip(t_inner.iter()).enumerate() {
1338                *jacobian.get_mut(row, col) = f * t;
1339            }
1340        }
1341        if let Some(col) = alpha2_col {
1342            for (row, &bg) in self.background.iter().enumerate() {
1343                *jacobian.get_mut(row, col) = bg;
1344            }
1345        }
1346
1347        Some(jacobian)
1348    }
1349}
1350
1351impl<'a> crate::forward_model::ForwardModel for CountsBackgroundScaleModel<'a> {
1352    fn predict(&self, params: &[f64]) -> Result<Vec<f64>, FittingError> {
1353        self.evaluate(params)
1354    }
1355
1356    fn n_data(&self) -> usize {
1357        self.flux.len()
1358    }
1359
1360    fn n_params(&self) -> usize {
1361        self.n_params
1362    }
1363}
1364
1365/// KL-compatible background model for transmission data.
1366///
1367/// Given a transmission model T_inner(θ), predicts:
1368///
1369///   T_out(E) = T_inner(E) + b₀ + b₁/√E
1370///
1371/// where b₀ and b₁ are the additive background parameters at indices
1372/// `b0_index` and `b1_index` in the parameter vector.
1373///
1374/// Unlike `NormalizedTransmissionModel` (which uses `Anorm * T + BackA +
1375/// BackB/√E + BackC√E` with 4 free parameters), this model:
1376/// - Has only 2 background parameters (b₀, b₁), reducing overfitting risk
1377/// - Constrains b₀, b₁ ≥ 0 via parameter bounds (physical: background
1378///   adds counts, never subtracts), ensuring T_out > 0 for valid Poisson NLL
1379/// - Does NOT multiply T_inner by a normalization factor — normalization
1380///   is handled separately (nuisance estimation for counts, or pre-processing
1381///   for transmission data)
1382///
1383/// ## Gradient
1384///
1385/// - ∂T_out/∂nₖ = ∂T_inner/∂nₖ = -σₖ(E)·T_inner(E)  (same as bare model)
1386/// - ∂T_out/∂b₀ = 1
1387/// - ∂T_out/∂b₁ = 1/√E
1388pub struct TransmissionKLBackgroundModel<'a> {
1389    /// Underlying transmission model (density parameters only).
1390    pub inner: &'a dyn FitModel,
1391    /// Precomputed 1/√E for each energy bin.
1392    pub inv_sqrt_energies: Vec<f64>,
1393    /// Index of b₀ (constant background) in the parameter vector.
1394    pub b0_index: usize,
1395    /// Index of b₁ (1/√E background) in the parameter vector.
1396    pub b1_index: usize,
1397    /// Total parameter count in the wrapped model.
1398    pub n_params: usize,
1399}
1400
1401impl<'a> FitModel for TransmissionKLBackgroundModel<'a> {
1402    fn evaluate(&self, params: &[f64]) -> Result<Vec<f64>, FittingError> {
1403        let t_inner = self.inner.evaluate(params)?;
1404        let b0 = params[self.b0_index];
1405        let b1 = params[self.b1_index];
1406        Ok(t_inner
1407            .iter()
1408            .zip(self.inv_sqrt_energies.iter())
1409            .map(|(&t, &inv_sqrt_e)| t + b0 + b1 * inv_sqrt_e)
1410            .collect())
1411    }
1412
1413    fn analytical_jacobian(
1414        &self,
1415        params: &[f64],
1416        free_param_indices: &[usize],
1417        y_current: &[f64],
1418    ) -> Option<FlatMatrix> {
1419        let n_e = y_current.len();
1420        let n_free = free_param_indices.len();
1421
1422        // Identify which free params are background vs inner model.
1423        let b0_col = free_param_indices.iter().position(|&i| i == self.b0_index);
1424        let b1_col = free_param_indices.iter().position(|&i| i == self.b1_index);
1425
1426        // Inner model free params (those not b0 or b1).
1427        let inner_free: Vec<usize> = free_param_indices
1428            .iter()
1429            .copied()
1430            .filter(|&i| i != self.b0_index && i != self.b1_index)
1431            .collect();
1432
1433        // Get inner model Jacobian for density columns.
1434        let inner_jac = if !inner_free.is_empty() {
1435            // Evaluate inner model at current params to get T_inner for y_current.
1436            let t_inner = self.inner.evaluate(params).ok()?;
1437            self.inner
1438                .analytical_jacobian(params, &inner_free, &t_inner)
1439        } else {
1440            None
1441        };
1442
1443        let mut jacobian = FlatMatrix::zeros(n_e, n_free);
1444
1445        // Fill inner model columns (density, temperature).
1446        // Inner Jacobian is the same as bare model — background doesn't
1447        // affect ∂T_inner/∂nₖ.
1448        if let Some(ref ij) = inner_jac {
1449            let mut inner_col = 0;
1450            for (col, &fp) in free_param_indices.iter().enumerate() {
1451                if fp == self.b0_index || fp == self.b1_index {
1452                    continue;
1453                }
1454                for row in 0..n_e {
1455                    *jacobian.get_mut(row, col) = ij.get(row, inner_col);
1456                }
1457                inner_col += 1;
1458            }
1459        } else {
1460            // No analytical inner Jacobian — fall back to FD for entire model.
1461            return None;
1462        }
1463
1464        // Background columns.
1465        if let Some(col) = b0_col {
1466            for row in 0..n_e {
1467                *jacobian.get_mut(row, col) = 1.0; // ∂T_out/∂b₀ = 1
1468            }
1469        }
1470        if let Some(col) = b1_col {
1471            for row in 0..n_e {
1472                *jacobian.get_mut(row, col) = self.inv_sqrt_energies[row]; // ∂T_out/∂b₁ = 1/√E
1473            }
1474        }
1475
1476        Some(jacobian)
1477    }
1478}
1479
1480impl<'a> crate::forward_model::ForwardModel for TransmissionKLBackgroundModel<'a> {
1481    fn predict(&self, params: &[f64]) -> Result<Vec<f64>, FittingError> {
1482        self.evaluate(params)
1483    }
1484
1485    fn n_data(&self) -> usize {
1486        self.inv_sqrt_energies.len()
1487    }
1488
1489    fn n_params(&self) -> usize {
1490        self.n_params
1491    }
1492}
1493
1494#[cfg(test)]
1495mod tests {
1496    use super::*;
1497    use crate::lm::FitModel;
1498    use crate::parameters::FitParameter;
1499
1500    /// Simple model: y = a * exp(-b * x)
1501    /// This mimics transmission: counts = flux * exp(-density * sigma)
1502    struct ExponentialModel {
1503        x: Vec<f64>,
1504        flux: Vec<f64>,
1505    }
1506
1507    impl FitModel for ExponentialModel {
1508        fn evaluate(&self, params: &[f64]) -> Result<Vec<f64>, FittingError> {
1509            let b = params[0]; // "density"
1510            Ok(self
1511                .x
1512                .iter()
1513                .zip(self.flux.iter())
1514                .map(|(&xi, &fi)| fi * (-b * xi).exp())
1515                .collect())
1516        }
1517    }
1518
1519    #[test]
1520    fn test_poisson_nll_perfect_match() {
1521        let y_obs = vec![10.0, 20.0, 30.0];
1522        let y_model = vec![10.0, 20.0, 30.0];
1523        let nll = poisson_nll(&y_obs, &y_model);
1524        // NLL = Σ(y_model - y_obs*ln(y_model))
1525        let expected: f64 = y_obs
1526            .iter()
1527            .zip(y_model.iter())
1528            .map(|(&o, &m)| m - o * m.ln())
1529            .sum();
1530        assert!((nll - expected).abs() < 1e-10);
1531    }
1532
1533    #[test]
1534    fn test_poisson_fit_exponential() {
1535        // Generate synthetic Poisson data from y = 1000 * exp(-0.5 * x)
1536        let x: Vec<f64> = (0..20).map(|i| i as f64 * 0.5).collect();
1537        let true_b = 0.5;
1538        let flux: Vec<f64> = vec![1000.0; x.len()];
1539
1540        let model = ExponentialModel {
1541            x: x.clone(),
1542            flux: flux.clone(),
1543        };
1544
1545        // Use exact expected counts (no noise) for reproducibility
1546        let y_obs = model.evaluate(&[true_b]).unwrap();
1547
1548        let mut params = ParameterSet::new(vec![
1549            FitParameter::non_negative("b", 1.0), // Initial guess 2× off
1550        ]);
1551
1552        let result = poisson_fit(&model, &y_obs, &mut params, &PoissonConfig::default()).unwrap();
1553
1554        assert!(
1555            result.converged,
1556            "Poisson fit did not converge after {} iterations",
1557            result.iterations,
1558        );
1559        assert!(
1560            (result.params[0] - true_b).abs() / true_b < 0.05,
1561            "Fitted b = {}, true = {}, error = {:.1}%",
1562            result.params[0],
1563            true_b,
1564            (result.params[0] - true_b).abs() / true_b * 100.0,
1565        );
1566    }
1567
1568    #[test]
1569    fn test_poisson_fit_low_counts() {
1570        // Low-count regime: flux = 10 counts per bin
1571        let x: Vec<f64> = (0..30).map(|i| i as f64 * 0.2).collect();
1572        let true_b = 0.3;
1573        let flux: Vec<f64> = vec![10.0; x.len()];
1574
1575        let model = ExponentialModel {
1576            x: x.clone(),
1577            flux: flux.clone(),
1578        };
1579
1580        let y_obs = model.evaluate(&[true_b]).unwrap();
1581
1582        let mut params = ParameterSet::new(vec![FitParameter::non_negative("b", 0.1)]);
1583
1584        let result = poisson_fit(&model, &y_obs, &mut params, &PoissonConfig::default()).unwrap();
1585
1586        assert!(result.converged);
1587        assert!(
1588            (result.params[0] - true_b).abs() / true_b < 0.1,
1589            "Low-count: fitted b = {}, true = {}",
1590            result.params[0],
1591            true_b,
1592        );
1593    }
1594
1595    #[test]
1596    fn test_poisson_non_negativity() {
1597        // Data that would drive parameter negative without constraint
1598        let x: Vec<f64> = (0..10).map(|i| i as f64).collect();
1599        let flux: Vec<f64> = vec![100.0; x.len()];
1600
1601        let model = ExponentialModel {
1602            x: x.clone(),
1603            flux: flux.clone(),
1604        };
1605
1606        // Generate data with b=0 (constant), but start with b=1
1607        let y_obs: Vec<f64> = vec![100.0; x.len()];
1608
1609        let mut params = ParameterSet::new(vec![FitParameter::non_negative("b", 1.0)]);
1610
1611        let result = poisson_fit(&model, &y_obs, &mut params, &PoissonConfig::default()).unwrap();
1612
1613        assert!(
1614            result.params[0] >= 0.0,
1615            "b should be non-negative, got {}",
1616            result.params[0],
1617        );
1618        assert!(
1619            result.params[0] < 0.1,
1620            "b should be ~0, got {}",
1621            result.params[0],
1622        );
1623    }
1624
1625    #[test]
1626    fn test_counts_model() {
1627        struct ConstTransmission;
1628        impl FitModel for ConstTransmission {
1629            fn evaluate(&self, params: &[f64]) -> Result<Vec<f64>, FittingError> {
1630                Ok(vec![params[0]; 3])
1631            }
1632        }
1633
1634        let t_model = ConstTransmission;
1635        let flux = [100.0, 200.0, 300.0];
1636        let background = [5.0, 10.0, 15.0];
1637        let counts_model = CountsModel {
1638            transmission_model: &t_model,
1639            flux: &flux,
1640            background: &background,
1641            n_params: 1,
1642        };
1643
1644        // T = 0.5 → counts = flux*0.5 + background
1645        let result = counts_model.evaluate(&[0.5]).unwrap();
1646        assert!((result[0] - 55.0).abs() < 1e-10);
1647        assert!((result[1] - 110.0).abs() < 1e-10);
1648        assert!((result[2] - 165.0).abs() < 1e-10);
1649        assert_eq!(
1650            crate::forward_model::ForwardModel::n_params(&counts_model),
1651            1
1652        );
1653    }
1654
1655    #[test]
1656    fn test_counts_background_scale_model() {
1657        struct ConstTransmission;
1658        impl FitModel for ConstTransmission {
1659            fn evaluate(&self, params: &[f64]) -> Result<Vec<f64>, FittingError> {
1660                Ok(vec![params[0]; 3])
1661            }
1662
1663            fn analytical_jacobian(
1664                &self,
1665                _params: &[f64],
1666                free_param_indices: &[usize],
1667                _y_current: &[f64],
1668            ) -> Option<FlatMatrix> {
1669                let mut jac = FlatMatrix::zeros(3, free_param_indices.len());
1670                for (col, &fp) in free_param_indices.iter().enumerate() {
1671                    if fp == 0 {
1672                        for row in 0..3 {
1673                            *jac.get_mut(row, col) = 1.0;
1674                        }
1675                    }
1676                }
1677                Some(jac)
1678            }
1679        }
1680
1681        let t_model = ConstTransmission;
1682        let flux = [100.0, 200.0, 300.0];
1683        let background = [5.0, 10.0, 15.0];
1684        let counts_model = CountsBackgroundScaleModel {
1685            transmission_model: &t_model,
1686            flux: &flux,
1687            background: &background,
1688            alpha1_index: 1,
1689            alpha2_index: 2,
1690            n_params: 3,
1691        };
1692
1693        let params = [0.5, 0.8, 1.5];
1694        let result = counts_model.evaluate(&params).unwrap();
1695        assert!((result[0] - 47.5).abs() < 1e-10);
1696        assert!((result[1] - 95.0).abs() < 1e-10);
1697        assert!((result[2] - 142.5).abs() < 1e-10);
1698        assert_eq!(
1699            crate::forward_model::ForwardModel::n_params(&counts_model),
1700            3
1701        );
1702    }
1703
1704    #[test]
1705    fn test_poisson_fit_multi_density_temperature_converges() {
1706        struct MultiDensityCountsModel {
1707            energies: Vec<f64>,
1708            flux: Vec<f64>,
1709            density_count: usize,
1710            temp_index: usize,
1711        }
1712
1713        impl MultiDensityCountsModel {
1714            fn sigma(&self, iso: usize, energy: f64, temp_k: f64) -> f64 {
1715                let center = 6.0 + iso as f64 * 4.5;
1716                let amp = 150.0 + 25.0 * iso as f64;
1717                let base_width = 0.8 + 0.12 * iso as f64;
1718                let width_coeff = 0.05 + 0.01 * iso as f64;
1719                let width = (base_width * (1.0 + width_coeff * (temp_k - 300.0) / 300.0)).max(0.1);
1720                let delta = energy - center;
1721                let gauss = (-(delta * delta) / (2.0 * width * width)).exp();
1722                amp * gauss
1723            }
1724
1725            fn dsigma_dt(&self, iso: usize, energy: f64, temp_k: f64) -> f64 {
1726                let center = 6.0 + iso as f64 * 4.5;
1727                let amp = 150.0 + 25.0 * iso as f64;
1728                let base_width = 0.8 + 0.12 * iso as f64;
1729                let width_coeff = 0.05 + 0.01 * iso as f64;
1730                let width = (base_width * (1.0 + width_coeff * (temp_k - 300.0) / 300.0)).max(0.1);
1731                let delta = energy - center;
1732                let gauss = (-(delta * delta) / (2.0 * width * width)).exp();
1733                let dwidth_dt = base_width * width_coeff / 300.0;
1734                amp * gauss * (delta * delta) * dwidth_dt / width.powi(3)
1735            }
1736        }
1737
1738        impl FitModel for MultiDensityCountsModel {
1739            fn evaluate(&self, params: &[f64]) -> Result<Vec<f64>, FittingError> {
1740                let temp_k = params[self.temp_index];
1741                let mut out = Vec::with_capacity(self.energies.len());
1742                for (i, &energy) in self.energies.iter().enumerate() {
1743                    let optical_depth = (0..self.density_count)
1744                        .map(|iso| params[iso] * self.sigma(iso, energy, temp_k))
1745                        .sum::<f64>();
1746                    out.push(self.flux[i] * (-optical_depth).exp());
1747                }
1748                Ok(out)
1749            }
1750
1751            fn analytical_jacobian(
1752                &self,
1753                params: &[f64],
1754                free_param_indices: &[usize],
1755                y_current: &[f64],
1756            ) -> Option<crate::lm::FlatMatrix> {
1757                let temp_k = params[self.temp_index];
1758                let mut jac =
1759                    crate::lm::FlatMatrix::zeros(self.energies.len(), free_param_indices.len());
1760                for (row, &energy) in self.energies.iter().enumerate() {
1761                    let y = y_current[row];
1762                    let mut sum_n_dsigma_dt = 0.0;
1763                    for (iso, &density) in params[..self.density_count].iter().enumerate() {
1764                        sum_n_dsigma_dt += density * self.dsigma_dt(iso, energy, temp_k);
1765                    }
1766                    for (col, &fp) in free_param_indices.iter().enumerate() {
1767                        let val = if fp == self.temp_index {
1768                            -y * sum_n_dsigma_dt
1769                        } else {
1770                            -y * self.sigma(fp, energy, temp_k)
1771                        };
1772                        *jac.get_mut(row, col) = val;
1773                    }
1774                }
1775                Some(jac)
1776            }
1777        }
1778
1779        let energies: Vec<f64> = (0..220).map(|i| 1.0 + 0.18 * i as f64).collect();
1780        let flux: Vec<f64> = energies
1781            .iter()
1782            .map(|&e| 1500.0 * (1.0 + 0.15 * (e / 8.0).sin()).max(0.2))
1783            .collect();
1784        let density_count = 6usize;
1785        let temp_index = density_count;
1786        let model = MultiDensityCountsModel {
1787            energies,
1788            flux,
1789            density_count,
1790            temp_index,
1791        };
1792
1793        let true_params = vec![3.2e-4, 2.4e-4, 1.7e-4, 1.1e-4, 7.5e-5, 4.2e-5, 360.0];
1794        let y_obs = model.evaluate(&true_params).unwrap();
1795
1796        let mut params = ParameterSet::new(vec![
1797            FitParameter::non_negative("n0", 6.0e-4),
1798            FitParameter::non_negative("n1", 4.0e-4),
1799            FitParameter::non_negative("n2", 2.5e-4),
1800            FitParameter::non_negative("n3", 1.8e-4),
1801            FitParameter::non_negative("n4", 1.0e-4),
1802            FitParameter::non_negative("n5", 8.0e-5),
1803            FitParameter {
1804                name: "temperature_k".into(),
1805                value: 300.0,
1806                lower: 1.0,
1807                upper: 5000.0,
1808                fixed: false,
1809            },
1810        ]);
1811
1812        let config = PoissonConfig {
1813            max_iter: 200,
1814            gauss_newton_lambda: 1e-4,
1815            ..PoissonConfig::default()
1816        };
1817        let result = poisson_fit(&model, &y_obs, &mut params, &config).unwrap();
1818
1819        assert!(
1820            result.converged,
1821            "multi-density+temperature Poisson fit did not converge after {} iterations",
1822            result.iterations,
1823        );
1824        assert!(
1825            result.iterations < config.max_iter,
1826            "fit hit max_iter={}, params={:?}",
1827            config.max_iter,
1828            result.params,
1829        );
1830
1831        for (i, (&fit, &truth)) in result.params[..density_count]
1832            .iter()
1833            .zip(true_params[..density_count].iter())
1834            .enumerate()
1835        {
1836            let rel_err = (fit - truth).abs() / truth;
1837            assert!(
1838                rel_err < 0.10,
1839                "density[{i}] fit={fit} truth={truth} rel_err={rel_err:.3}",
1840            );
1841        }
1842
1843        let fitted_temp = result.params[temp_index];
1844        assert!(
1845            (fitted_temp - true_params[temp_index]).abs() < 10.0,
1846            "temperature fit={fitted_temp} truth={}",
1847            true_params[temp_index],
1848        );
1849        assert!(
1850            result.iterations <= 80,
1851            "expected analytical KL path to converge well before max_iter; got {}",
1852            result.iterations,
1853        );
1854    }
1855
1856    #[test]
1857    fn test_poisson_fit_exact_optimum_without_analytical_jacobian_converges() {
1858        let x: Vec<f64> = (0..20).map(|i| i as f64 * 0.5).collect();
1859        let true_b = 0.5;
1860        let flux: Vec<f64> = vec![1000.0; x.len()];
1861
1862        let model = ExponentialModel { x, flux };
1863        let y_obs = model.evaluate(&[true_b]).unwrap();
1864        let mut params = ParameterSet::new(vec![FitParameter::non_negative("b", true_b)]);
1865        let config = PoissonConfig {
1866            fd_step: 1e-4,
1867            tol_param: 1e-12,
1868            max_iter: 50,
1869            ..PoissonConfig::default()
1870        };
1871
1872        let result = poisson_fit(&model, &y_obs, &mut params, &config).unwrap();
1873
1874        assert!(
1875            result.converged,
1876            "exact-optimum FD fit should converge instead of exhausting line search"
1877        );
1878        assert!(
1879            result.iterations < config.max_iter,
1880            "fit should stop by convergence, not hit max_iter"
1881        );
1882        assert!(
1883            (result.params[0] - true_b).abs() < 1e-6,
1884            "parameter drifted away from optimum: {}",
1885            result.params[0]
1886        );
1887    }
1888
1889    #[test]
1890    fn test_projected_gradient_ignores_lower_bound_blocked_direction() {
1891        let params = ParameterSet::new(vec![
1892            FitParameter::non_negative("density", 0.0),
1893            FitParameter::unbounded("temp", 300.0),
1894        ]);
1895        let free_idx = vec![0, 1];
1896        let grad = vec![0.25, -0.5];
1897
1898        let inactive = inactive_free_positions(&params, &free_idx, &grad);
1899        assert_eq!(
1900            inactive,
1901            vec![1],
1902            "lower-bound blocked density should be active"
1903        );
1904
1905        let pg_norm = projected_gradient_norm(&params, &free_idx, &grad);
1906        assert!(
1907            (pg_norm - 0.5).abs() < 1e-12,
1908            "projected gradient should ignore blocked lower-bound component"
1909        );
1910    }
1911
1912    #[test]
1913    fn test_max_feasible_step_hits_lower_bound() {
1914        let params = ParameterSet::new(vec![
1915            FitParameter::non_negative("density", 0.2),
1916            FitParameter::unbounded("temp", 300.0),
1917        ]);
1918        let alpha = max_feasible_step(&params, &[0, 1], &[0.2, 300.0], &[0.5, 0.0]);
1919        assert!(
1920            (alpha - 0.4).abs() < 1e-12,
1921            "feasible step should stop exactly at lower bound"
1922        );
1923    }
1924
1925    #[test]
1926    fn test_max_feasible_step_hits_upper_bound() {
1927        let params = ParameterSet::new(vec![FitParameter {
1928            name: "temp".into(),
1929            value: 300.0,
1930            lower: 1.0,
1931            upper: 500.0,
1932            fixed: false,
1933        }]);
1934        let alpha = max_feasible_step(&params, &[0], &[300.0], &[-50.0]);
1935        assert!(
1936            (alpha - 4.0).abs() < 1e-12,
1937            "feasible step should stop exactly at upper bound"
1938        );
1939    }
1940
1941    #[test]
1942    fn test_inactive_mask_changes_when_bound_activity_changes() {
1943        let free_idx = vec![0, 1];
1944        let params_at_bound = ParameterSet::new(vec![
1945            FitParameter::non_negative("density", 0.0),
1946            FitParameter::non_negative("temp", 1.0),
1947        ]);
1948        let params_free = ParameterSet::new(vec![
1949            FitParameter::non_negative("density", 0.2),
1950            FitParameter::non_negative("temp", 1.0),
1951        ]);
1952
1953        let mask_at_bound = inactive_free_mask(&params_at_bound, &free_idx, &[0.3, -0.2]);
1954        let mask_free = inactive_free_mask(&params_free, &free_idx, &[0.3, -0.2]);
1955
1956        assert_eq!(mask_at_bound, vec![false, true]);
1957        assert_eq!(mask_free, vec![true, true]);
1958        assert_ne!(
1959            mask_at_bound, mask_free,
1960            "active-set changes should invalidate FD quasi-Newton history"
1961        );
1962    }
1963
1964    #[test]
1965    fn test_lbfgs_history_two_loop_matches_secant_direction() {
1966        let mut history = LbfgsHistory::new(4);
1967        history.update(&[0.0], &[1.0], &[0.0], &[2.0]);
1968        let dir = history
1969            .apply_on_positions(&[4.0], &[0])
1970            .expect("history should produce a direction");
1971        assert!(
1972            (dir[0] - 2.0).abs() < 1e-12,
1973            "1D secant pair should scale gradient by inverse curvature"
1974        );
1975    }
1976
1977    #[test]
1978    fn test_lbfgs_subspace_ignores_active_components() {
1979        let mut history = LbfgsHistory::new(4);
1980        history.update(&[0.0, 0.0], &[1.0, 100.0], &[0.0, 0.0], &[2.0, 100.0]);
1981        let dir = history
1982            .apply_on_positions(&[4.0, 50.0], &[0])
1983            .expect("subspace history should produce a direction");
1984        assert!(
1985            (dir[0] - 2.0).abs() < 1e-12,
1986            "inactive-subspace L-BFGS should match 1D secant scaling on the free variable"
1987        );
1988        assert!(
1989            dir[1].abs() < 1e-12,
1990            "inactive-subspace L-BFGS should not leak blocked-variable history into the direction"
1991        );
1992    }
1993
1994    #[test]
1995    fn test_poisson_fit_converges_at_bound_active_optimum() {
1996        struct OffsetModel {
1997            base: Vec<f64>,
1998        }
1999
2000        impl FitModel for OffsetModel {
2001            fn evaluate(&self, params: &[f64]) -> Result<Vec<f64>, FittingError> {
2002                Ok(self.base.iter().map(|&b| b + params[0]).collect())
2003            }
2004
2005            fn analytical_jacobian(
2006                &self,
2007                _params: &[f64],
2008                free_param_indices: &[usize],
2009                y_current: &[f64],
2010            ) -> Option<FlatMatrix> {
2011                let mut jac = FlatMatrix::zeros(y_current.len(), free_param_indices.len());
2012                for (col, &fp) in free_param_indices.iter().enumerate() {
2013                    assert_eq!(fp, 0);
2014                    for row in 0..y_current.len() {
2015                        *jac.get_mut(row, col) = 1.0;
2016                    }
2017                }
2018                Some(jac)
2019            }
2020        }
2021
2022        let model = OffsetModel {
2023            base: vec![10.0; 12],
2024        };
2025        let y_obs = vec![8.0; 12];
2026        let mut params = ParameterSet::new(vec![FitParameter::non_negative("offset", 0.0)]);
2027
2028        let result = poisson_fit(&model, &y_obs, &mut params, &PoissonConfig::default()).unwrap();
2029
2030        assert!(
2031            result.converged,
2032            "bound-active optimum should satisfy projected optimality"
2033        );
2034        assert_eq!(
2035            result.iterations, 1,
2036            "should stop on projected-gradient check"
2037        );
2038        assert!(
2039            result.params[0].abs() < 1e-12,
2040            "offset should stay pinned at lower bound, got {}",
2041            result.params[0]
2042        );
2043    }
2044
2045    #[test]
2046    fn test_poisson_fit_fd_lbfgs_handles_coupled_two_parameter_model() {
2047        struct CoupledExponentialModel {
2048            x: Vec<f64>,
2049        }
2050
2051        impl FitModel for CoupledExponentialModel {
2052            fn evaluate(&self, params: &[f64]) -> Result<Vec<f64>, FittingError> {
2053                let amp = params[0];
2054                let decay = params[1];
2055                Ok(self
2056                    .x
2057                    .iter()
2058                    .map(|&x| amp * (-decay * x).exp() + 1.0)
2059                    .collect())
2060            }
2061        }
2062
2063        let model = CoupledExponentialModel {
2064            x: (0..60).map(|i| i as f64 * 0.08).collect(),
2065        };
2066        let true_params = [120.0, 0.45];
2067        let y_obs = model.evaluate(&true_params).unwrap();
2068        let mut params = ParameterSet::new(vec![
2069            FitParameter::non_negative("amp", 30.0),
2070            FitParameter::non_negative("decay", 1.2),
2071        ]);
2072        let config = PoissonConfig {
2073            max_iter: 120,
2074            lbfgs_history: 8,
2075            ..PoissonConfig::default()
2076        };
2077        let result = poisson_fit(&model, &y_obs, &mut params, &config).unwrap();
2078        let mut baseline_params = ParameterSet::new(vec![
2079            FitParameter::non_negative("amp", 30.0),
2080            FitParameter::non_negative("decay", 1.2),
2081        ]);
2082        let baseline = poisson_fit(
2083            &model,
2084            &y_obs,
2085            &mut baseline_params,
2086            &PoissonConfig {
2087                lbfgs_history: 0,
2088                ..config.clone()
2089            },
2090        )
2091        .unwrap();
2092
2093        assert!(
2094            result.converged,
2095            "FD L-BFGS fit did not converge: {result:?}"
2096        );
2097        assert!(
2098            baseline.converged,
2099            "baseline FD fit should still converge: {baseline:?}"
2100        );
2101        assert!(
2102            result.iterations <= 60,
2103            "expected FD quasi-Newton path to converge well before max_iter; got {}",
2104            result.iterations,
2105        );
2106        assert!(
2107            result.iterations < baseline.iterations,
2108            "L-BFGS fallback should beat no-history gradient scaling: lbfgs={} baseline={}",
2109            result.iterations,
2110            baseline.iterations,
2111        );
2112        assert!(
2113            (result.params[0] - true_params[0]).abs() / true_params[0] < 0.02,
2114            "amplitude fit={}, true={}",
2115            result.params[0],
2116            true_params[0],
2117        );
2118        assert!(
2119            (result.params[1] - true_params[1]).abs() / true_params[1] < 0.02,
2120            "decay fit={}, true={}",
2121            result.params[1],
2122            true_params[1],
2123        );
2124    }
2125
2126    #[test]
2127    fn test_poisson_fit_fd_lbfgs_with_bound_active_offset_uses_subspace() {
2128        struct OffsetDecayModel {
2129            x: Vec<f64>,
2130        }
2131
2132        impl FitModel for OffsetDecayModel {
2133            fn evaluate(&self, params: &[f64]) -> Result<Vec<f64>, FittingError> {
2134                let offset = params[0];
2135                let decay = params[1];
2136                Ok(self
2137                    .x
2138                    .iter()
2139                    .map(|&x| offset + (-decay * x).exp())
2140                    .collect())
2141            }
2142        }
2143
2144        let model = OffsetDecayModel {
2145            x: (0..60).map(|i| i as f64 * 0.08).collect(),
2146        };
2147        let true_params = [0.0, 0.35];
2148        let y_obs = model.evaluate(&true_params).unwrap();
2149
2150        let config = PoissonConfig {
2151            max_iter: 120,
2152            lbfgs_history: 8,
2153            ..PoissonConfig::default()
2154        };
2155        let mut params = ParameterSet::new(vec![
2156            FitParameter::non_negative("offset", 0.0),
2157            FitParameter::non_negative("decay", 1.1),
2158        ]);
2159        let result = poisson_fit(&model, &y_obs, &mut params, &config).unwrap();
2160        assert!(
2161            result.converged,
2162            "subspace FD L-BFGS fit did not converge: {result:?}"
2163        );
2164        assert!(
2165            result.iterations <= 20,
2166            "bound-active subspace FD fit should converge comfortably before max_iter; got {}",
2167            result.iterations,
2168        );
2169        assert!(
2170            result.params[0].abs() < 1e-8,
2171            "offset should remain at the lower bound, got {}",
2172            result.params[0]
2173        );
2174        assert!(
2175            (result.params[1] - true_params[1]).abs() / true_params[1] < 0.02,
2176            "decay fit={}, true={}",
2177            result.params[1],
2178            true_params[1],
2179        );
2180    }
2181
2182    #[test]
2183    fn test_poisson_fit_temperature_and_background_converges() {
2184        struct TempTransmissionModel {
2185            energies: Vec<f64>,
2186        }
2187
2188        impl TempTransmissionModel {
2189            fn sigma(&self, energy: f64, temp_k: f64) -> f64 {
2190                let center = 6.0;
2191                let amp = 110.0;
2192                let base_width = 0.55;
2193                let width = (base_width * (temp_k / 300.0).sqrt()).max(0.08);
2194                let delta = energy - center;
2195                amp * (-(delta * delta) / (2.0 * width * width)).exp()
2196            }
2197
2198            fn dsigma_dt(&self, energy: f64, temp_k: f64) -> f64 {
2199                let center = 6.0;
2200                let amp = 110.0;
2201                let base_width = 0.55;
2202                let width = (base_width * (temp_k / 300.0).sqrt()).max(0.08);
2203                let dwidth_dt = if temp_k > 0.0 {
2204                    base_width / (2.0 * (300.0 * temp_k).sqrt())
2205                } else {
2206                    0.0
2207                };
2208                let delta = energy - center;
2209                let gauss = (-(delta * delta) / (2.0 * width * width)).exp();
2210                amp * gauss * (delta * delta) * dwidth_dt / width.powi(3)
2211            }
2212        }
2213
2214        impl FitModel for TempTransmissionModel {
2215            fn evaluate(&self, params: &[f64]) -> Result<Vec<f64>, FittingError> {
2216                let density = params[0];
2217                let temp_k = params[1];
2218                Ok(self
2219                    .energies
2220                    .iter()
2221                    .map(|&energy| (-density * self.sigma(energy, temp_k)).exp())
2222                    .collect())
2223            }
2224
2225            fn analytical_jacobian(
2226                &self,
2227                params: &[f64],
2228                free_param_indices: &[usize],
2229                y_current: &[f64],
2230            ) -> Option<FlatMatrix> {
2231                let density = params[0];
2232                let temp_k = params[1];
2233                let mut jac = FlatMatrix::zeros(self.energies.len(), free_param_indices.len());
2234                for (row, &energy) in self.energies.iter().enumerate() {
2235                    let y = y_current[row];
2236                    let sigma = self.sigma(energy, temp_k);
2237                    let dsigma_dt = self.dsigma_dt(energy, temp_k);
2238                    for (col, &fp) in free_param_indices.iter().enumerate() {
2239                        let deriv = match fp {
2240                            0 => -sigma * y,
2241                            1 => -density * dsigma_dt * y,
2242                            _ => unreachable!("unexpected parameter index {fp}"),
2243                        };
2244                        *jac.get_mut(row, col) = deriv;
2245                    }
2246                }
2247                Some(jac)
2248            }
2249        }
2250
2251        let energies: Vec<f64> = (0..180).map(|i| 1.0 + 0.06 * i as f64).collect();
2252        let inner = TempTransmissionModel {
2253            energies: energies.clone(),
2254        };
2255        let inv_sqrt_energies: Vec<f64> = energies.iter().map(|&e| 1.0 / e.sqrt()).collect();
2256        let wrapped = TransmissionKLBackgroundModel {
2257            inner: &inner,
2258            inv_sqrt_energies,
2259            b0_index: 2,
2260            b1_index: 3,
2261            n_params: 4,
2262        };
2263
2264        let true_params = vec![4.5e-4, 345.0, 0.012, 0.008];
2265        let y_obs = wrapped.evaluate(&true_params).unwrap();
2266
2267        let mut params = ParameterSet::new(vec![
2268            FitParameter::non_negative("density", 8.0e-4),
2269            FitParameter {
2270                name: "temperature_k".into(),
2271                value: 290.0,
2272                lower: 1.0,
2273                upper: 5000.0,
2274                fixed: false,
2275            },
2276            FitParameter {
2277                name: "kl_b0".into(),
2278                value: 0.0,
2279                lower: 0.0,
2280                upper: 0.5,
2281                fixed: false,
2282            },
2283            FitParameter {
2284                name: "kl_b1".into(),
2285                value: 0.0,
2286                lower: 0.0,
2287                upper: 0.5,
2288                fixed: false,
2289            },
2290        ]);
2291
2292        let config = PoissonConfig {
2293            max_iter: 120,
2294            gauss_newton_lambda: 1e-4,
2295            ..PoissonConfig::default()
2296        };
2297        let result = poisson_fit(&wrapped, &y_obs, &mut params, &config).unwrap();
2298
2299        assert!(result.converged, "fit did not converge: {result:?}");
2300        assert!(
2301            result.iterations <= 80,
2302            "expected convergence well before max_iter; got {}",
2303            result.iterations,
2304        );
2305        assert!(
2306            (result.params[0] - true_params[0]).abs() / true_params[0] < 0.05,
2307            "density fit={}, true={}",
2308            result.params[0],
2309            true_params[0],
2310        );
2311        assert!(
2312            (result.params[1] - true_params[1]).abs() < 8.0,
2313            "temperature fit={}, true={}",
2314            result.params[1],
2315            true_params[1],
2316        );
2317        assert!(
2318            (result.params[2] - true_params[2]).abs() < 5e-3,
2319            "b0 fit={}, true={}",
2320            result.params[2],
2321            true_params[2],
2322        );
2323        assert!(
2324            (result.params[3] - true_params[3]).abs() < 5e-3,
2325            "b1 fit={}, true={}",
2326            result.params[3],
2327            true_params[3],
2328        );
2329    }
2330}