nereids_fitting/
poisson.rs

1//! Poisson-likelihood optimizer for low-count neutron data.
2//!
3//! When neutron counts are low (< ~30 per bin), the Gaussian/chi-squared
4//! assumption breaks down and Poisson statistics must be used directly.
5//!
6//! ## Negative log-likelihood
7//!
8//! L(θ) = Σᵢ [y_model(θ)ᵢ - y_obs,ᵢ · ln(y_model(θ)ᵢ)]
9//!
10//! This is minimized using a projected optimizer:
11//! - damped Gauss-Newton / Fisher steps when an analytical Jacobian exists
12//! - finite-difference projected gradient fallback otherwise
13//!
14//! Both paths use backtracking line search and bound constraints
15//! (non-negativity on densities).
16//!
17//! ## TRINIDI Reference
18//! - `trinidi/reconstruct.py` — Poisson NLL and APGM optimizer
19
20use nereids_core::constants::{PIVOT_FLOOR, POISSON_EPSILON};
21
22use crate::error::FittingError;
23use crate::lm::{FitModel, FlatMatrix};
24use crate::parameters::{FitParameter, ParameterSet};
25
26/// Configuration for the Poisson optimizer.
27#[derive(Debug, Clone)]
28pub struct PoissonConfig {
29    /// Maximum number of iterations.
30    pub max_iter: usize,
31    /// Step size for finite-difference gradient.
32    pub fd_step: f64,
33    /// Initial step size for line search.
34    pub step_size: f64,
35    /// Convergence tolerance used for both parameter displacement (L2 norm of step)
36    /// and gradient-norm convergence checks in `poisson_fit`.
37    pub tol_param: f64,
38    /// Armijo line search parameter (sufficient decrease).
39    pub armijo_c: f64,
40    /// Line search backtracking factor.
41    pub backtrack: f64,
42    /// Relative diagonal damping for analytical Gauss-Newton / Fisher steps.
43    pub gauss_newton_lambda: f64,
44    /// History size for the finite-difference L-BFGS fallback.
45    pub lbfgs_history: usize,
46    /// Whether to compute the Fisher covariance matrix (and uncertainties)
47    /// after convergence.  Set to `false` for per-pixel spatial mapping
48    /// when only densities are needed, avoiding extra model evaluations.
49    pub compute_covariance: bool,
50}
51
52impl Default for PoissonConfig {
53    fn default() -> Self {
54        Self {
55            max_iter: 200,
56            fd_step: 1e-7,
57            step_size: 1.0,
58            tol_param: 1e-8,
59            armijo_c: 1e-4,
60            backtrack: 0.5,
61            gauss_newton_lambda: 1e-3,
62            lbfgs_history: 8,
63            compute_covariance: true,
64        }
65    }
66}
67
68/// Result of Poisson-likelihood optimization.
69#[derive(Debug, Clone)]
70pub struct PoissonResult {
71    /// Final negative log-likelihood.
72    pub nll: f64,
73    /// Number of iterations taken.
74    pub iterations: usize,
75    /// Whether the optimizer converged.
76    pub converged: bool,
77    /// Final parameter values (all parameters, including fixed).
78    pub params: Vec<f64>,
79    /// Local covariance estimate from the inverse Fisher information matrix
80    /// at the converged parameters: `F⁻¹ = (J^T H J)⁻¹` where
81    /// `H = diag(obs/model²)` is the Poisson Hessian.
82    ///
83    /// This is a local curvature estimate, NOT a Bayesian posterior.
84    /// When an analytical Jacobian is available, it is used directly.
85    /// Otherwise a finite-difference Jacobian is computed as fallback.
86    /// `None` when the fit did not converge, the Fisher matrix is
87    /// singular, or covariance computation is disabled via config.
88    pub covariance: Option<FlatMatrix>,
89    /// Standard errors of free parameters: `√diag(F⁻¹)`.
90    /// `None` when covariance is not available.
91    pub uncertainties: Option<Vec<f64>>,
92}
93
94/// Compute Poisson negative log-likelihood.
95///
96/// NLL = Σᵢ [y_model - y_obs · ln(y_model)]
97///
98/// #109.2: For y_model ≤ epsilon, use a smooth C¹ quadratic extrapolation
99/// instead of a hard 1e30 penalty.  This keeps the NLL and its gradient
100/// continuous, so gradient-based optimizers (projected gradient, L-BFGS)
101/// can smoothly steer back into the feasible region rather than hitting
102/// a discontinuous cliff that stalls the line search.
103fn poisson_nll(y_obs: &[f64], y_model: &[f64]) -> f64 {
104    y_obs
105        .iter()
106        .zip(y_model.iter())
107        .map(|(&obs, &mdl)| poisson_nll_term(obs, mdl))
108        .sum()
109}
110
111/// Single-bin Poisson NLL with smooth extrapolation for mdl <= epsilon.
112///
113/// For mdl > 0: NLL = mdl - obs * ln(mdl)
114/// For mdl <= epsilon: quadratic Taylor expansion about epsilon,
115///   NLL(ε) + NLL'(ε)·(mdl−ε) + ½·NLL''(ε)·(mdl−ε)²
116/// where NLL'(x) = 1 − obs/x and NLL''(x) = obs/x².
117///
118/// Since delta = ε − mdl ≥ 0, this becomes:
119///   NLL(ε) − NLL'(ε)·delta + ½·NLL''(ε)·delta²
120///
121/// When obs == 0, the exact Hessian obs/ε² vanishes, leaving only a linear
122/// term that decreases without bound as mdl → −∞.  This can cause the
123/// optimizer to diverge.  We impose a minimum curvature of 1/ε so the
124/// quadratic penalty still curves upward for negative predictions.
125#[inline]
126fn poisson_nll_term(obs: f64, mdl: f64) -> f64 {
127    // #125.3: Negative observed counts would produce wrong-signed NLL terms.
128    // Release builds skip this check; callers must ensure non-negative counts. See #125 item 3.
129    debug_assert!(
130        obs.is_finite() && obs >= 0.0,
131        "poisson_nll_term: obs must be finite and >= 0, got {obs}"
132    );
133    if mdl > POISSON_EPSILON {
134        mdl - obs * mdl.ln()
135    } else {
136        let eps = POISSON_EPSILON;
137        let nll_eps = eps - obs * eps.ln();
138        let grad_eps = 1.0 - obs / eps;
139        // Minimum curvature 1/eps ensures the penalty grows quadratically
140        // even when obs == 0 (where the exact Hessian obs/eps^2 vanishes).
141        let hess_eps = if obs > 0.0 {
142            obs / (eps * eps)
143        } else {
144            1.0 / eps
145        };
146        let delta = eps - mdl;
147        // Taylor expansion: f(eps) + f'(eps)*(mdl - eps) + 0.5*f''(eps)*(mdl - eps)^2
148        // Since (mdl - eps) = -delta, the linear term flips sign.
149        nll_eps - grad_eps * delta + 0.5 * hess_eps * delta * delta
150    }
151}
152
153/// Per-bin Poisson NLL weight: ∂f(obs, mdl)/∂mdl.
154///
155/// For mdl > ε: w = 1 - obs/mdl
156/// For mdl ≤ ε: derivative of the smooth quadratic extrapolation,
157///   w = grad_eps - hess_eps · (ε - mdl), continuous at boundary.
158#[inline]
159fn poisson_nll_weight(obs: f64, mdl: f64) -> f64 {
160    if mdl > POISSON_EPSILON {
161        1.0 - obs / mdl
162    } else {
163        let eps = POISSON_EPSILON;
164        let grad_eps = 1.0 - obs / eps;
165        let hess_eps = if obs > 0.0 {
166            obs / (eps * eps)
167        } else {
168            1.0 / eps
169        };
170        grad_eps - hess_eps * (eps - mdl)
171    }
172}
173
174/// Per-bin Poisson NLL curvature: ∂²f(obs, mdl)/∂mdl².
175///
176/// For mdl > ε: h = obs / mdl²
177/// For mdl ≤ ε: curvature of the smooth quadratic extrapolation.
178#[inline]
179fn poisson_nll_curvature(obs: f64, mdl: f64) -> f64 {
180    if mdl > POISSON_EPSILON {
181        obs / (mdl * mdl)
182    } else {
183        let eps = POISSON_EPSILON;
184        if obs > 0.0 {
185            obs / (eps * eps)
186        } else {
187            1.0 / eps
188        }
189    }
190}
191
192/// Analytical first/second-order information for the Poisson objective.
193#[derive(Debug)]
194struct AnalyticalStepData {
195    /// Gradient of the Poisson NLL: grad = J^T · w.
196    grad: Vec<f64>,
197    /// Full Gauss-Newton / Fisher curvature approximation: J^T H J.
198    fisher: FlatMatrix,
199}
200
201/// Compute gradient and Gauss-Newton / Fisher curvature of the Poisson NLL
202/// using the analytical Jacobian.
203///
204/// `grad_j = Σᵢ wᵢ · J_{i,j}` where `wᵢ = ∂NLL/∂y_model_i`
205/// and `J_{i,j} = ∂y_model_i/∂θⱼ` from `model.analytical_jacobian()`.
206///
207/// The curvature uses the Poisson Hessian with respect to the model output:
208/// `fisher_{j,k} = Σᵢ hᵢ · J_{i,j} · J_{i,k}` where
209/// `hᵢ = ∂²NLL/∂y_model_i²`.
210///
211/// Returns `Some(step_data)` if the model provides an analytical Jacobian,
212/// `None` otherwise (caller should fall back to finite differences).
213fn compute_analytical_step_data(
214    model: &dyn FitModel,
215    params: &ParameterSet,
216    y_obs: &[f64],
217    y_model: &[f64],
218    all_vals_buf: &mut Vec<f64>,
219    free_idx_buf: &mut Vec<usize>,
220) -> Option<AnalyticalStepData> {
221    params.all_values_into(all_vals_buf);
222    params.free_indices_into(free_idx_buf);
223    let jac = model.analytical_jacobian(all_vals_buf, free_idx_buf, y_model)?;
224    let n_e = y_obs.len();
225    let n_free = free_idx_buf.len();
226    let mut grad = vec![0.0f64; n_free];
227    let mut fisher = FlatMatrix::zeros(n_free, n_free);
228    for i in 0..n_e {
229        let w = poisson_nll_weight(y_obs[i], y_model[i]);
230        let h = poisson_nll_curvature(y_obs[i], y_model[i]);
231        for (g, j) in grad.iter_mut().zip(0..n_free) {
232            let jij = jac.get(i, j);
233            *g += w * jij;
234            for k in 0..n_free {
235                *fisher.get_mut(j, k) += h * jij * jac.get(i, k);
236            }
237        }
238    }
239    Some(AnalyticalStepData { grad, fisher })
240}
241
242/// Compute gradient of Poisson NLL by finite differences.
243///
244/// `all_vals_buf` is a reusable scratch buffer for `params.all_values_into()`,
245/// avoiding a fresh allocation on every `model.evaluate()` call inside the
246/// per-parameter FD loop (N_free+1 allocations saved per gradient call).
247///
248/// `free_idx_buf` is a scratch buffer for `params.free_indices_into()`, reused
249/// across iterations to avoid per-gradient allocation.
250fn compute_gradient(
251    model: &dyn FitModel,
252    params: &mut ParameterSet,
253    y_obs: &[f64],
254    fd_step: f64,
255    all_vals_buf: &mut Vec<f64>,
256    free_idx_buf: &mut Vec<usize>,
257) -> Result<Vec<f64>, FittingError> {
258    params.all_values_into(all_vals_buf);
259    let base_model = model.evaluate(all_vals_buf)?;
260    let base_nll = poisson_nll(y_obs, &base_model);
261
262    params.free_indices_into(free_idx_buf);
263    let mut grad = vec![0.0; free_idx_buf.len()];
264
265    for (j, &idx) in free_idx_buf.iter().enumerate() {
266        let original = params.params[idx].value;
267        let step = fd_step * (1.0 + original.abs());
268
269        params.params[idx].value = original + step;
270        params.params[idx].clamp();
271        let mut actual_step = params.params[idx].value - original;
272
273        // #112: If the forward step is blocked by an upper bound, try the
274        // backward step so the gradient component is not frozen at zero.
275        if actual_step.abs() < PIVOT_FLOOR {
276            params.params[idx].value = original - step;
277            params.params[idx].clamp();
278            actual_step = params.params[idx].value - original;
279            if actual_step.abs() < PIVOT_FLOOR {
280                // Truly stuck at a point constraint — skip this parameter.
281                params.params[idx].value = original;
282                continue;
283            }
284        }
285
286        params.all_values_into(all_vals_buf);
287        let perturbed_model = match model.evaluate(all_vals_buf) {
288            Ok(v) => v,
289            Err(_) => {
290                params.params[idx].value = original;
291                continue;
292            }
293        };
294        let perturbed_nll = poisson_nll(y_obs, &perturbed_model);
295        params.params[idx].value = original;
296
297        grad[j] = (perturbed_nll - base_nll) / actual_step;
298    }
299
300    Ok(grad)
301}
302
303fn normalized_step_norm(
304    old_free: &[f64],
305    new_free: &[f64],
306    params: &ParameterSet,
307    free_param_indices: &[usize],
308) -> f64 {
309    old_free
310        .iter()
311        .zip(new_free.iter())
312        .zip(free_param_indices.iter())
313        .map(|((&old, &new), &idx)| {
314            let range = params.params[idx].upper - params.params[idx].lower;
315            let scale = if range.is_finite() && range > 1e-10 {
316                range
317            } else {
318                old.abs().max(1e-3)
319            };
320            ((old - new) / scale).powi(2)
321        })
322        .sum::<f64>()
323        .sqrt()
324}
325
326fn is_bound_active(param: &FitParameter, grad: f64) -> bool {
327    let at_lower = param.lower.is_finite() && (param.value - param.lower).abs() <= PIVOT_FLOOR;
328    let at_upper = param.upper.is_finite() && (param.value - param.upper).abs() <= PIVOT_FLOOR;
329    (at_lower && grad > 0.0) || (at_upper && grad < 0.0)
330}
331
332fn inactive_free_positions(
333    params: &ParameterSet,
334    free_param_indices: &[usize],
335    grad: &[f64],
336) -> Vec<usize> {
337    free_param_indices
338        .iter()
339        .zip(grad.iter())
340        .enumerate()
341        .filter_map(|(pos, (&idx, &g))| (!is_bound_active(&params.params[idx], g)).then_some(pos))
342        .collect()
343}
344
345fn inactive_free_mask(
346    params: &ParameterSet,
347    free_param_indices: &[usize],
348    grad: &[f64],
349) -> Vec<bool> {
350    free_param_indices
351        .iter()
352        .zip(grad.iter())
353        .map(|(&idx, &g)| !is_bound_active(&params.params[idx], g))
354        .collect()
355}
356
357fn projected_gradient_norm(
358    params: &ParameterSet,
359    free_param_indices: &[usize],
360    grad: &[f64],
361) -> f64 {
362    free_param_indices
363        .iter()
364        .zip(grad.iter())
365        .map(|(&idx, &g)| {
366            if is_bound_active(&params.params[idx], g) {
367                0.0
368            } else {
369                g * g
370            }
371        })
372        .sum::<f64>()
373        .sqrt()
374}
375
376fn extract_submatrix(matrix: &FlatMatrix, positions: &[usize]) -> FlatMatrix {
377    let n = positions.len();
378    let mut sub = FlatMatrix::zeros(n, n);
379    for (row_out, &row_in) in positions.iter().enumerate() {
380        for (col_out, &col_in) in positions.iter().enumerate() {
381            *sub.get_mut(row_out, col_out) = matrix.get(row_in, col_in);
382        }
383    }
384    sub
385}
386
387#[derive(Debug, Clone)]
388struct LbfgsHistory {
389    s_list: Vec<Vec<f64>>,
390    y_list: Vec<Vec<f64>>,
391    max_pairs: usize,
392}
393
394impl LbfgsHistory {
395    fn new(max_pairs: usize) -> Self {
396        Self {
397            s_list: Vec::with_capacity(max_pairs),
398            y_list: Vec::with_capacity(max_pairs),
399            max_pairs,
400        }
401    }
402
403    fn clear(&mut self) {
404        self.s_list.clear();
405        self.y_list.clear();
406    }
407
408    fn update(&mut self, old_free: &[f64], new_free: &[f64], old_grad: &[f64], new_grad: &[f64]) {
409        if self.max_pairs == 0 {
410            return;
411        }
412        let s: Vec<f64> = new_free
413            .iter()
414            .zip(old_free.iter())
415            .map(|(&new, &old)| new - old)
416            .collect();
417        let y: Vec<f64> = new_grad
418            .iter()
419            .zip(old_grad.iter())
420            .map(|(&new, &old)| new - old)
421            .collect();
422        let sy = dot(&s, &y);
423        let s_norm = dot(&s, &s).sqrt();
424        let y_norm = dot(&y, &y).sqrt();
425        if sy <= 1e-12 * s_norm * y_norm.max(1.0) {
426            return;
427        }
428        if self.s_list.len() == self.max_pairs {
429            self.s_list.remove(0);
430            self.y_list.remove(0);
431        }
432        self.s_list.push(s);
433        self.y_list.push(y);
434    }
435
436    fn apply_on_positions(&self, grad: &[f64], positions: &[usize]) -> Option<Vec<f64>> {
437        if self.s_list.is_empty() || positions.is_empty() {
438            return None;
439        }
440
441        let mut q: Vec<f64> = positions.iter().map(|&pos| grad[pos]).collect();
442        let mut alpha = vec![0.0; self.s_list.len()];
443        let mut rho = vec![0.0; self.s_list.len()];
444        let mut used = vec![false; self.s_list.len()];
445
446        for i in (0..self.s_list.len()).rev() {
447            let s_sub = extract_positions(&self.s_list[i], positions);
448            let y_sub = extract_positions(&self.y_list[i], positions);
449            let sy = dot(&s_sub, &y_sub);
450            if sy <= 1e-12 {
451                continue;
452            }
453            rho[i] = 1.0 / sy;
454            used[i] = true;
455            alpha[i] = rho[i] * dot(&s_sub, &q);
456            axpy(&mut q, -alpha[i], &y_sub);
457        }
458
459        let gamma = used
460            .iter()
461            .enumerate()
462            .rev()
463            .find_map(|(i, &is_used)| {
464                if !is_used {
465                    return None;
466                }
467                let last_s = extract_positions(&self.s_list[i], positions);
468                let last_y = extract_positions(&self.y_list[i], positions);
469                let yy = dot(&last_y, &last_y);
470                (yy > 0.0).then_some(dot(&last_s, &last_y) / yy)
471            })
472            .unwrap_or(1.0);
473
474        let mut r: Vec<f64> = q.into_iter().map(|v| gamma * v).collect();
475        for i in 0..self.s_list.len() {
476            if !used[i] {
477                continue;
478            }
479            let s_sub = extract_positions(&self.s_list[i], positions);
480            let y_sub = extract_positions(&self.y_list[i], positions);
481            let beta = rho[i] * dot(&y_sub, &r);
482            axpy(&mut r, alpha[i] - beta, &s_sub);
483        }
484
485        let mut full = vec![0.0; grad.len()];
486        for (&pos, &value) in positions.iter().zip(r.iter()) {
487            full[pos] = value;
488        }
489        Some(full)
490    }
491}
492
493fn dot(a: &[f64], b: &[f64]) -> f64 {
494    a.iter().zip(b.iter()).map(|(&x, &y)| x * y).sum()
495}
496
497fn axpy(dst: &mut [f64], alpha: f64, src: &[f64]) {
498    for (d, &s) in dst.iter_mut().zip(src.iter()) {
499        *d += alpha * s;
500    }
501}
502
503fn extract_positions(values: &[f64], positions: &[usize]) -> Vec<f64> {
504    positions.iter().map(|&pos| values[pos]).collect()
505}
506
507fn parameter_scaled_gradient_direction(
508    params: &ParameterSet,
509    free_param_indices: &[usize],
510    grad: &[f64],
511) -> Vec<f64> {
512    grad.iter()
513        .zip(free_param_indices.iter())
514        .map(|(&g, &idx)| {
515            if is_bound_active(&params.params[idx], g) {
516                return 0.0;
517            }
518            let p = &params.params[idx];
519            let range = p.upper - p.lower;
520            if range.is_finite() && range > 1e-10 {
521                g * range * range
522            } else {
523                let scale = p.value.abs().max(1e-3);
524                g * scale * scale
525            }
526        })
527        .collect()
528}
529
530fn max_feasible_step(
531    params: &ParameterSet,
532    free_param_indices: &[usize],
533    old_free: &[f64],
534    search_dir: &[f64],
535) -> f64 {
536    let mut alpha_max = f64::INFINITY;
537    for ((&idx, &x), &d) in free_param_indices
538        .iter()
539        .zip(old_free.iter())
540        .zip(search_dir.iter())
541    {
542        if d.abs() <= PIVOT_FLOOR {
543            continue;
544        }
545        let p = &params.params[idx];
546        let candidate = if d > 0.0 && p.lower.is_finite() {
547            (x - p.lower) / d
548        } else if d < 0.0 && p.upper.is_finite() {
549            (p.upper - x) / (-d)
550        } else {
551            f64::INFINITY
552        };
553        alpha_max = alpha_max.min(candidate);
554    }
555    alpha_max.max(0.0)
556}
557
558enum LineSearchResult {
559    Accepted {
560        nll: f64,
561        y_model: Vec<f64>,
562        hit_boundary: bool,
563    },
564    Stagnated,
565    Failed,
566}
567
568const MAX_FACE_STEPS_PER_ITER: usize = 4;
569
570/// Backtracking line search with Armijo condition.
571///
572/// Backtracking line search with Armijo sufficient-decrease condition:
573/// try a step, reject NaN/Inf model outputs, check the Armijo sufficient-decrease
574/// condition, and backtrack if needed.
575///
576/// # Arguments
577/// * `model`        — Forward model (maps parameters -> predicted counts).
578/// * `params`       — Parameter set (modified in place on success).
579/// * `y_obs`        — Observed counts.
580/// * `old_free`     — Free parameter values before the step.
581/// * `search_dir`   — Search direction (gradient or preconditioned gradient).
582/// * `initial_alpha`— Initial step size.
583/// * `config`       — Optimizer configuration (backtrack factor, Armijo c).
584/// * `grad`         — Raw gradient (used for Armijo descent computation).
585/// * `nll`          — Current negative log-likelihood.
586/// * `all_vals_buf` — Scratch buffer for `params.all_values_into()`, reused
587///   across the up-to-50 backtracking iterations to avoid per-trial allocation.
588/// * `free_vals_buf`— Scratch buffer for `params.free_values_into()`.
589/// * `trial_free_buf` — Scratch buffer for the trial free-parameter vector,
590///   reused across backtracking iterations to avoid up to 50 allocations.
591///
592/// # Returns
593/// `Some((new_nll, y_model))` if a step was accepted, `None` if the line search
594/// exhausted all backtracking attempts. Returns the model output alongside NLL
595/// so the caller can cache it for the next analytical gradient computation.
596///
597/// # Failure contract
598///
599/// On `None` return (line search exhausted), `params` is restored to
600/// `old_free` before returning. Callers need not restore manually.
601// All 12 arguments are genuinely needed: 9 original + 3 scratch buffers that
602// avoid per-backtracking-iteration allocations inside the 50-trial loop.
603#[allow(clippy::too_many_arguments)]
604fn backtracking_line_search(
605    model: &dyn FitModel,
606    params: &mut ParameterSet,
607    y_obs: &[f64],
608    old_free: &[f64],
609    free_param_indices: &[usize],
610    search_dir: &[f64],
611    initial_alpha: f64,
612    config: &PoissonConfig,
613    grad: &[f64],
614    nll: f64,
615    all_vals_buf: &mut Vec<f64>,
616    free_vals_buf: &mut Vec<f64>,
617    trial_free_buf: &mut Vec<f64>,
618) -> LineSearchResult {
619    let alpha_max = max_feasible_step(params, free_param_indices, old_free, search_dir);
620    if alpha_max <= PIVOT_FLOOR {
621        params.set_free_values(old_free);
622        return LineSearchResult::Stagnated;
623    }
624    let mut alpha = initial_alpha.min(alpha_max);
625    for _ in 0..50 {
626        // Trial step along the feasible path: x_new = x - alpha * d, with
627        // alpha capped so inactive-subspace directions hit bounds exactly
628        // instead of relying on projection to distort the step.
629        trial_free_buf.clear();
630        for ((&idx, &v), &d) in free_param_indices
631            .iter()
632            .zip(old_free.iter())
633            .zip(search_dir.iter())
634        {
635            let p = &params.params[idx];
636            trial_free_buf.push((v - alpha * d).clamp(p.lower, p.upper));
637        }
638        params.set_free_values(trial_free_buf);
639
640        params.all_values_into(all_vals_buf);
641        let trial_model = match model.evaluate(all_vals_buf) {
642            Ok(v) => v,
643            Err(_) => {
644                alpha *= config.backtrack;
645                continue;
646            }
647        };
648
649        // #113: If the model produced NaN/Inf, reduce step size rather
650        // than accepting a garbage NLL.
651        if trial_model.iter().any(|v| !v.is_finite()) {
652            alpha *= config.backtrack;
653            continue;
654        }
655
656        let trial_nll = poisson_nll(y_obs, &trial_model);
657
658        // Armijo condition: f(x_new) <= f(x) - c * descent
659        params.free_values_into(free_vals_buf);
660        let step_norm = normalized_step_norm(old_free, free_vals_buf, params, free_param_indices);
661        let descent = grad
662            .iter()
663            .zip(old_free.iter())
664            .zip(free_vals_buf.iter())
665            .map(|((&g, &old), &new)| g * (old - new))
666            .sum::<f64>();
667
668        if trial_nll.is_finite() && trial_nll <= nll - config.armijo_c * descent {
669            return LineSearchResult::Accepted {
670                nll: trial_nll,
671                y_model: trial_model,
672                hit_boundary: alpha_max.is_finite()
673                    && (alpha_max - alpha).abs() <= 1e-12 * alpha_max.max(1.0),
674            };
675        }
676
677        let nll_delta = (trial_nll - nll).abs();
678        let nll_scale = trial_nll.abs().max(nll.abs()).max(1.0);
679        if trial_nll.is_finite()
680            && step_norm < config.tol_param
681            && nll_delta <= config.tol_param * nll_scale
682        {
683            params.set_free_values(old_free);
684            return LineSearchResult::Stagnated;
685        }
686
687        // Backtrack
688        alpha *= config.backtrack;
689        if alpha <= PIVOT_FLOOR {
690            break;
691        }
692    }
693    params.set_free_values(old_free);
694    LineSearchResult::Failed
695}
696
697/// Early return for all-fixed parameters: evaluate once and report.
698///
699/// Returns `Ok(Some(PoissonResult))` if all parameters are fixed (either a
700/// valid result with converged=true, or a non-finite NLL with converged=false).
701/// Returns `Ok(None)` if there are free parameters and optimization should
702/// proceed. Returns `Err(FittingError)` if model evaluation fails.
703fn try_early_return_fixed(
704    model: &dyn FitModel,
705    y_obs: &[f64],
706    params: &ParameterSet,
707) -> Result<Option<PoissonResult>, FittingError> {
708    if params.n_free() != 0 {
709        return Ok(None);
710    }
711    let y_model = model.evaluate(&params.all_values())?;
712    let nll = poisson_nll(y_obs, &y_model);
713    if !nll.is_finite() {
714        return Ok(Some(PoissonResult {
715            nll,
716            iterations: 0,
717            converged: false,
718            params: params.all_values(),
719            covariance: None,
720            uncertainties: None,
721        }));
722    }
723    Ok(Some(PoissonResult {
724        nll,
725        iterations: 0,
726        converged: true,
727        params: params.all_values(),
728        covariance: None,
729        uncertainties: None,
730    }))
731}
732
733/// Build the Poisson Fisher information matrix via finite-difference Jacobian.
734///
735/// Used as fallback when the model does not provide an analytical Jacobian
736/// (e.g., `TransmissionFitModel` without base_xs).  Returns `None` if any
737/// model evaluation fails during the FD perturbation.
738///
739/// Respects parameter bounds via `clamp()` and uses the actual clamped step
740/// size.  When a bound blocks the central-difference step in one direction,
741/// falls back to one-sided difference.
742fn compute_fd_fisher(
743    model: &dyn FitModel,
744    params: &mut ParameterSet,
745    y_obs: &[f64],
746    y_model: &[f64],
747    fd_step: f64,
748    all_vals_buf: &mut Vec<f64>,
749    free_idx_buf: &mut Vec<usize>,
750) -> Option<FlatMatrix> {
751    params.free_indices_into(free_idx_buf);
752    let n_free = free_idx_buf.len();
753    let n_e = y_obs.len();
754
755    let mut jac = FlatMatrix::zeros(n_e, n_free);
756    for (col, &fi) in free_idx_buf.iter().enumerate() {
757        let orig = params.params[fi].value;
758        let h = fd_step * (1.0 + orig.abs());
759
760        // Forward perturbation (clamped to bounds).
761        params.params[fi].value = orig + h;
762        params.params[fi].clamp();
763        let step_plus = params.params[fi].value - orig;
764
765        let y_plus = if step_plus.abs() > PIVOT_FLOOR {
766            params.all_values_into(all_vals_buf);
767            let y = match model.evaluate(all_vals_buf) {
768                Ok(v) => v,
769                Err(_) => {
770                    params.params[fi].value = orig;
771                    return None;
772                }
773            };
774            Some(y)
775        } else {
776            None
777        };
778
779        // Backward perturbation (clamped to bounds).
780        params.params[fi].value = orig - h;
781        params.params[fi].clamp();
782        let step_minus = params.params[fi].value - orig;
783
784        let y_minus = if step_minus.abs() > PIVOT_FLOOR {
785            params.all_values_into(all_vals_buf);
786            let y = match model.evaluate(all_vals_buf) {
787                Ok(v) => v,
788                Err(_) => {
789                    params.params[fi].value = orig;
790                    return None;
791                }
792            };
793            Some(y)
794        } else {
795            None
796        };
797
798        params.params[fi].value = orig;
799
800        // Central difference, or one-sided fallback at bounds.
801        match (&y_plus, &y_minus) {
802            (Some(yp), Some(ym)) => {
803                let denom = step_plus - step_minus;
804                for (i, (&vp, &vm)) in yp.iter().zip(ym.iter()).enumerate() {
805                    *jac.get_mut(i, col) = (vp - vm) / denom;
806                }
807            }
808            (Some(yp), None) => {
809                for (i, (&vp, &v0)) in yp.iter().zip(y_model.iter()).enumerate() {
810                    *jac.get_mut(i, col) = (vp - v0) / step_plus;
811                }
812            }
813            (None, Some(ym)) => {
814                for (i, (&v0, &vm)) in y_model.iter().zip(ym.iter()).enumerate() {
815                    *jac.get_mut(i, col) = (v0 - vm) / (-step_minus);
816                }
817            }
818            (None, None) => {
819                // Both directions blocked — Jacobian column stays zero.
820            }
821        }
822    }
823
824    let mut fisher = FlatMatrix::zeros(n_free, n_free);
825    for i in 0..n_e {
826        let h_i = poisson_nll_curvature(y_obs[i], y_model[i]);
827        for j in 0..n_free {
828            let jij = jac.get(i, j);
829            for k in 0..n_free {
830                *fisher.get_mut(j, k) += h_i * jij * jac.get(i, k);
831            }
832        }
833    }
834    Some(fisher)
835}
836
837/// Run Poisson-likelihood optimization using a projected KL optimizer.
838///
839/// Uses damped Gauss-Newton / Fisher steps when an analytical Jacobian is
840/// available, falling back to projected gradient descent otherwise. Both paths
841/// use backtracking line search with Armijo condition and projection onto
842/// parameter bounds after each step.
843///
844/// # Arguments
845/// * `model` — Forward model (maps parameters → predicted counts).
846/// * `y_obs` — Observed counts at each data point.
847/// * `params` — Parameter set (modified in place).
848/// * `config` — Optimizer configuration.
849///
850/// # Returns
851/// `Ok(PoissonResult)` with final NLL, parameters, and convergence status.
852/// `Err(FittingError)` if model evaluation fails at the initial point.
853/// Evaluation errors during line-search trials are treated as bad steps
854/// (backtrack), not fatal errors.
855pub fn poisson_fit(
856    model: &dyn FitModel,
857    y_obs: &[f64],
858    params: &mut ParameterSet,
859    config: &PoissonConfig,
860) -> Result<PoissonResult, FittingError> {
861    if let Some(result) = try_early_return_fixed(model, y_obs, params)? {
862        return Ok(result);
863    }
864
865    // Scratch buffers reused across the entire optimization loop to avoid
866    // per-iteration allocations inside compute_gradient (N_free+1 calls)
867    // and backtracking_line_search (up to 50 calls).
868    let mut all_vals_buf = Vec::with_capacity(params.params.len());
869    let mut free_vals_buf = Vec::with_capacity(params.n_free());
870    let mut old_free_buf: Vec<f64> = Vec::with_capacity(params.n_free());
871    let mut trial_free_buf: Vec<f64> = Vec::with_capacity(params.n_free());
872    let mut free_idx_buf: Vec<usize> = Vec::with_capacity(params.n_free());
873    let mut fd_history = LbfgsHistory::new(config.lbfgs_history);
874    let mut pending_fd_state: Option<(Vec<f64>, Vec<f64>, Vec<bool>)> = None;
875
876    params.all_values_into(&mut all_vals_buf);
877    let mut y_model = model.evaluate(&all_vals_buf)?;
878    let mut nll = poisson_nll(y_obs, &y_model);
879
880    // Guard: if the initial NLL is non-finite, bail out immediately rather
881    // than entering the optimization loop with garbage values.
882    if !nll.is_finite() {
883        return Ok(PoissonResult {
884            nll,
885            iterations: 0,
886            converged: false,
887            params: params.all_values(),
888            covariance: None,
889            uncertainties: None,
890        });
891    }
892
893    let mut converged = false;
894    let mut iter = 0;
895
896    'outer: for _ in 0..config.max_iter {
897        iter += 1;
898        let mut face_steps = 0usize;
899
900        loop {
901            // Compute gradient: try analytical (grad = J^T · w) first,
902            // fall back to finite differences if the model doesn't provide
903            // an analytical Jacobian.
904            let analytical_step = compute_analytical_step_data(
905                model,
906                params,
907                y_obs,
908                &y_model,
909                &mut all_vals_buf,
910                &mut free_idx_buf,
911            );
912            let grad = if let Some(ref analytical) = analytical_step {
913                analytical.grad.clone()
914            } else {
915                compute_gradient(
916                    model,
917                    params,
918                    y_obs,
919                    config.fd_step,
920                    &mut all_vals_buf,
921                    &mut free_idx_buf,
922                )?
923            };
924
925            params.free_indices_into(&mut free_idx_buf);
926
927            let using_fd = analytical_step.is_none();
928            if using_fd {
929                params.free_values_into(&mut free_vals_buf);
930                let current_mask = inactive_free_mask(params, &free_idx_buf, &grad);
931                if let Some((prev_free, prev_grad, prev_mask)) = pending_fd_state.take() {
932                    if prev_mask == current_mask {
933                        fd_history.update(&prev_free, &free_vals_buf, &prev_grad, &grad);
934                    } else {
935                        fd_history.clear();
936                    }
937                }
938                pending_fd_state = Some((free_vals_buf.clone(), grad.clone(), current_mask));
939            } else {
940                pending_fd_state.take();
941                fd_history.clear();
942            }
943
944            // Use projected-gradient optimality for bound-constrained problems.
945            let projected_grad_norm = projected_gradient_norm(params, &free_idx_buf, &grad);
946            if projected_grad_norm < config.tol_param {
947                converged = true;
948                break 'outer;
949            }
950
951            let (search_dir, initial_alpha): (Vec<f64>, f64) =
952                if let Some(ref analytical) = analytical_step {
953                    let inactive_positions = inactive_free_positions(params, &free_idx_buf, &grad);
954                    if inactive_positions.is_empty() {
955                        converged = true;
956                        break 'outer;
957                    }
958                    let reduced_fisher = extract_submatrix(&analytical.fisher, &inactive_positions);
959                    let reduced_grad: Vec<f64> =
960                        inactive_positions.iter().map(|&pos| grad[pos]).collect();
961                    let reduced_dir = crate::lm::solve_damped_system(
962                        &reduced_fisher,
963                        &reduced_grad,
964                        config.gauss_newton_lambda,
965                    )
966                    .unwrap_or_else(|| {
967                        reduced_grad
968                            .iter()
969                            .enumerate()
970                            .map(|(j, &g)| g / reduced_fisher.get(j, j).max(1e-12))
971                            .collect()
972                    });
973                    let mut dir = vec![0.0; grad.len()];
974                    for (&pos, &value) in inactive_positions.iter().zip(reduced_dir.iter()) {
975                        dir[pos] = value;
976                    }
977                    (dir, config.step_size)
978                } else {
979                    let inactive_positions = inactive_free_positions(params, &free_idx_buf, &grad);
980                    if inactive_positions.is_empty() {
981                        converged = true;
982                        break 'outer;
983                    }
984                    let used_history = !fd_history.s_list.is_empty();
985                    let mut dir = fd_history
986                        .apply_on_positions(&grad, &inactive_positions)
987                        .unwrap_or_else(|| {
988                            parameter_scaled_gradient_direction(params, &free_idx_buf, &grad)
989                        });
990                    let descent = dot(&grad, &dir);
991                    if !descent.is_finite() || descent <= 0.0 {
992                        dir = parameter_scaled_gradient_direction(params, &free_idx_buf, &grad);
993                    }
994                    if used_history && descent.is_finite() && descent > 0.0 {
995                        (dir, config.step_size)
996                    } else {
997                        let search_norm: f64 = dir.iter().map(|d| d * d).sum::<f64>().sqrt();
998                        (dir, config.step_size / search_norm.max(1.0))
999                    }
1000                };
1001
1002            params.free_values_into(&mut free_vals_buf);
1003            old_free_buf.clear();
1004            old_free_buf.extend_from_slice(&free_vals_buf);
1005
1006            match backtracking_line_search(
1007                model,
1008                params,
1009                y_obs,
1010                &old_free_buf,
1011                &free_idx_buf,
1012                &search_dir,
1013                initial_alpha,
1014                config,
1015                &grad,
1016                nll,
1017                &mut all_vals_buf,
1018                &mut free_vals_buf,
1019                &mut trial_free_buf,
1020            ) {
1021                LineSearchResult::Accepted {
1022                    nll: new_nll,
1023                    y_model: new_y_model,
1024                    hit_boundary,
1025                } => {
1026                    if !using_fd {
1027                        pending_fd_state = None;
1028                    }
1029                    nll = new_nll;
1030                    y_model = new_y_model;
1031
1032                    if hit_boundary && face_steps < MAX_FACE_STEPS_PER_ITER {
1033                        face_steps += 1;
1034                        continue;
1035                    }
1036                }
1037                LineSearchResult::Stagnated => {
1038                    converged = true;
1039                    break 'outer;
1040                }
1041                LineSearchResult::Failed => {
1042                    break 'outer;
1043                }
1044            }
1045
1046            params.free_values_into(&mut free_vals_buf);
1047            let step_norm =
1048                normalized_step_norm(&old_free_buf, &free_vals_buf, params, &free_idx_buf);
1049            if step_norm < config.tol_param {
1050                converged = true;
1051                break 'outer;
1052            }
1053
1054            break;
1055        }
1056    }
1057
1058    // Compute local covariance from the inverse Fisher information matrix
1059    // at the converged parameters: cov = (J^T H J)^{-1}.
1060    // This is a local curvature estimate, not a Bayesian posterior.
1061    // Gated by config.compute_covariance to avoid extra evaluations when
1062    // the caller only needs densities (e.g., per-pixel spatial mapping).
1063    let (covariance, uncertainties) = if converged && config.compute_covariance {
1064        // Use the final y_model from the last accepted step rather than
1065        // re-evaluating (avoids extra model call and cannot turn a
1066        // successful fit into an error during post-processing).
1067
1068        // Build the Fisher information matrix J^T H J from the Jacobian at
1069        // the converged parameters.  Try the analytical Jacobian first; fall
1070        // back to finite differences if not available.
1071        let fisher_opt = if let Some(step_data) = compute_analytical_step_data(
1072            model,
1073            params,
1074            y_obs,
1075            &y_model,
1076            &mut all_vals_buf,
1077            &mut free_idx_buf,
1078        ) {
1079            Some(step_data.fisher)
1080        } else {
1081            // Build Fisher via FD Jacobian.
1082            compute_fd_fisher(
1083                model,
1084                params,
1085                y_obs,
1086                &y_model,
1087                config.fd_step,
1088                &mut all_vals_buf,
1089                &mut free_idx_buf,
1090            )
1091        };
1092
1093        if let Some(fisher) = fisher_opt {
1094            if let Some(cov) = crate::lm::invert_matrix(&fisher) {
1095                let n_free = cov.nrows;
1096                let unc: Vec<f64> = (0..n_free)
1097                    .map(|i| {
1098                        let d = cov.get(i, i);
1099                        if d.is_finite() && d > 0.0 {
1100                            d.sqrt()
1101                        } else {
1102                            f64::NAN
1103                        }
1104                    })
1105                    .collect();
1106                (Some(cov), Some(unc))
1107            } else {
1108                (None, None)
1109            }
1110        } else {
1111            (None, None)
1112        }
1113    } else {
1114        (None, None)
1115    };
1116
1117    Ok(PoissonResult {
1118        nll,
1119        iterations: iter,
1120        converged,
1121        params: params.all_values(),
1122        covariance,
1123        uncertainties,
1124    })
1125}
1126
1127/// Convert transmission model + counts to a counts-based forward model.
1128///
1129/// Given:
1130///   Y_model = flux × T_model(θ) + background
1131///
1132/// This wraps a transmission model to predict observed counts.
1133///
1134/// The `flux` and `background` slices must have the same length as the
1135/// transmission vector returned by the inner model. In debug builds,
1136/// `evaluate()` asserts this invariant.
1137pub struct CountsModel<'a> {
1138    /// Underlying transmission model.
1139    pub transmission_model: &'a dyn FitModel,
1140    /// Incident flux (counts per bin in open beam, after normalization).
1141    pub flux: &'a [f64],
1142    /// Background counts per bin.
1143    pub background: &'a [f64],
1144    /// Total parameter count in the wrapped model.
1145    pub n_params: usize,
1146}
1147
1148impl<'a> FitModel for CountsModel<'a> {
1149    fn evaluate(&self, params: &[f64]) -> Result<Vec<f64>, FittingError> {
1150        let transmission = self.transmission_model.evaluate(params)?;
1151        debug_assert_eq!(
1152            transmission.len(),
1153            self.flux.len(),
1154            "CountsModel: transmission length ({}) != flux length ({})",
1155            transmission.len(),
1156            self.flux.len(),
1157        );
1158        debug_assert_eq!(
1159            self.flux.len(),
1160            self.background.len(),
1161            "CountsModel: flux length ({}) != background length ({})",
1162            self.flux.len(),
1163            self.background.len(),
1164        );
1165        Ok(transmission
1166            .iter()
1167            .zip(self.flux.iter())
1168            .zip(self.background.iter())
1169            .map(|((&t, &f), &b)| f * t + b)
1170            .collect())
1171    }
1172
1173    /// Analytical Jacobian: ∂Y/∂θ = flux · ∂T_inner/∂θ.
1174    ///
1175    /// Background is constant w.r.t. θ and drops out.
1176    fn analytical_jacobian(
1177        &self,
1178        params: &[f64],
1179        free_param_indices: &[usize],
1180        y_current: &[f64],
1181    ) -> Option<FlatMatrix> {
1182        let n_e = y_current.len();
1183        // Recover inner transmission: T = (Y - background) / flux
1184        let t_inner: Vec<f64> = y_current
1185            .iter()
1186            .zip(self.flux.iter())
1187            .zip(self.background.iter())
1188            .map(|((&y, &f), &b)| if f.abs() > 1e-30 { (y - b) / f } else { 0.0 })
1189            .collect();
1190        let inner_jac =
1191            self.transmission_model
1192                .analytical_jacobian(params, free_param_indices, &t_inner)?;
1193        let n_free = free_param_indices.len();
1194        let mut jac = FlatMatrix::zeros(n_e, n_free);
1195        for i in 0..n_e {
1196            for j in 0..n_free {
1197                *jac.get_mut(i, j) = self.flux[i] * inner_jac.get(i, j);
1198            }
1199        }
1200        Some(jac)
1201    }
1202}
1203
1204// ── ForwardModel implementation for CountsModel (Phase 1) ────────────────
1205
1206impl<'a> crate::forward_model::ForwardModel for CountsModel<'a> {
1207    fn predict(&self, params: &[f64]) -> Result<Vec<f64>, FittingError> {
1208        self.evaluate(params)
1209    }
1210
1211    // No analytical jacobian — uses finite differences (same as FitModel).
1212
1213    fn n_data(&self) -> usize {
1214        self.flux.len()
1215    }
1216
1217    fn n_params(&self) -> usize {
1218        self.n_params
1219    }
1220}
1221
1222/// Counts model with optional nuisance scaling of signal and detector background.
1223///
1224/// Given a transmission model `T(θ)`, predicts:
1225///
1226///   Y(E) = α₁ · [Φ(E) · T(θ)] + α₂ · B(E)
1227///
1228/// where `α₁` and `α₂` are parameter-vector entries.
1229pub struct CountsBackgroundScaleModel<'a> {
1230    /// Underlying transmission model.
1231    pub transmission_model: &'a dyn FitModel,
1232    /// Incident flux spectrum.
1233    pub flux: &'a [f64],
1234    /// Detector background spectrum.
1235    pub background: &'a [f64],
1236    /// Index of α₁ in the parameter vector.
1237    pub alpha1_index: usize,
1238    /// Index of α₂ in the parameter vector.
1239    pub alpha2_index: usize,
1240    /// Total parameter count in the wrapped model.
1241    pub n_params: usize,
1242}
1243
1244impl<'a> FitModel for CountsBackgroundScaleModel<'a> {
1245    fn evaluate(&self, params: &[f64]) -> Result<Vec<f64>, FittingError> {
1246        let transmission = self.transmission_model.evaluate(params)?;
1247        let alpha1 = params[self.alpha1_index];
1248        let alpha2 = params[self.alpha2_index];
1249        debug_assert_eq!(transmission.len(), self.flux.len());
1250        debug_assert_eq!(self.flux.len(), self.background.len());
1251        Ok(transmission
1252            .iter()
1253            .zip(self.flux.iter())
1254            .zip(self.background.iter())
1255            .map(|((&t, &f), &b)| alpha1 * f * t + alpha2 * b)
1256            .collect())
1257    }
1258
1259    fn analytical_jacobian(
1260        &self,
1261        params: &[f64],
1262        free_param_indices: &[usize],
1263        y_current: &[f64],
1264    ) -> Option<FlatMatrix> {
1265        let n_e = y_current.len();
1266        let n_free = free_param_indices.len();
1267        let alpha1 = params[self.alpha1_index];
1268        let alpha1_col = free_param_indices
1269            .iter()
1270            .position(|&i| i == self.alpha1_index);
1271        let alpha2_col = free_param_indices
1272            .iter()
1273            .position(|&i| i == self.alpha2_index);
1274        let inner_free: Vec<usize> = free_param_indices
1275            .iter()
1276            .copied()
1277            .filter(|&i| i != self.alpha1_index && i != self.alpha2_index)
1278            .collect();
1279
1280        // Evaluate the inner transmission model directly instead of
1281        // reconstructing from y_current — reconstruction via
1282        // (y - alpha2*b)/(alpha1*f) is undefined when alpha1 ≈ 0.
1283        let t_inner = match self.transmission_model.evaluate(params) {
1284            Ok(t) => t,
1285            Err(_) => return None,
1286        };
1287
1288        let inner_jac = if !inner_free.is_empty() {
1289            self.transmission_model
1290                .analytical_jacobian(params, &inner_free, &t_inner)
1291        } else {
1292            None
1293        };
1294
1295        let mut jacobian = FlatMatrix::zeros(n_e, n_free);
1296        if let Some(ref ij) = inner_jac {
1297            let mut inner_col = 0;
1298            for (col, &fp) in free_param_indices.iter().enumerate() {
1299                if fp == self.alpha1_index || fp == self.alpha2_index {
1300                    continue;
1301                }
1302                for row in 0..n_e {
1303                    *jacobian.get_mut(row, col) = alpha1 * self.flux[row] * ij.get(row, inner_col);
1304                }
1305                inner_col += 1;
1306            }
1307        } else if !inner_free.is_empty() {
1308            return None;
1309        }
1310
1311        if let Some(col) = alpha1_col {
1312            for (row, (&f, &t)) in self.flux.iter().zip(t_inner.iter()).enumerate() {
1313                *jacobian.get_mut(row, col) = f * t;
1314            }
1315        }
1316        if let Some(col) = alpha2_col {
1317            for (row, &bg) in self.background.iter().enumerate() {
1318                *jacobian.get_mut(row, col) = bg;
1319            }
1320        }
1321
1322        Some(jacobian)
1323    }
1324}
1325
1326impl<'a> crate::forward_model::ForwardModel for CountsBackgroundScaleModel<'a> {
1327    fn predict(&self, params: &[f64]) -> Result<Vec<f64>, FittingError> {
1328        self.evaluate(params)
1329    }
1330
1331    fn n_data(&self) -> usize {
1332        self.flux.len()
1333    }
1334
1335    fn n_params(&self) -> usize {
1336        self.n_params
1337    }
1338}
1339
1340/// KL-compatible background model for transmission data.
1341///
1342/// Given a transmission model T_inner(θ), predicts:
1343///
1344///   T_out(E) = T_inner(E) + b₀ + b₁/√E
1345///
1346/// where b₀ and b₁ are the additive background parameters at indices
1347/// `b0_index` and `b1_index` in the parameter vector.
1348///
1349/// Unlike `NormalizedTransmissionModel` (which uses `Anorm * T + BackA +
1350/// BackB/√E + BackC√E` with 4 free parameters), this model:
1351/// - Has only 2 background parameters (b₀, b₁), reducing overfitting risk
1352/// - Constrains b₀, b₁ ≥ 0 via parameter bounds (physical: background
1353///   adds counts, never subtracts), ensuring T_out > 0 for valid Poisson NLL
1354/// - Does NOT multiply T_inner by a normalization factor — normalization
1355///   is handled separately (nuisance estimation for counts, or pre-processing
1356///   for transmission data)
1357///
1358/// ## Gradient
1359///
1360/// - ∂T_out/∂nₖ = ∂T_inner/∂nₖ = -σₖ(E)·T_inner(E)  (same as bare model)
1361/// - ∂T_out/∂b₀ = 1
1362/// - ∂T_out/∂b₁ = 1/√E
1363pub struct TransmissionKLBackgroundModel<'a> {
1364    /// Underlying transmission model (density parameters only).
1365    pub inner: &'a dyn FitModel,
1366    /// Precomputed 1/√E for each energy bin.
1367    pub inv_sqrt_energies: Vec<f64>,
1368    /// Index of b₀ (constant background) in the parameter vector.
1369    pub b0_index: usize,
1370    /// Index of b₁ (1/√E background) in the parameter vector.
1371    pub b1_index: usize,
1372    /// Total parameter count in the wrapped model.
1373    pub n_params: usize,
1374}
1375
1376impl<'a> FitModel for TransmissionKLBackgroundModel<'a> {
1377    fn evaluate(&self, params: &[f64]) -> Result<Vec<f64>, FittingError> {
1378        let t_inner = self.inner.evaluate(params)?;
1379        let b0 = params[self.b0_index];
1380        let b1 = params[self.b1_index];
1381        Ok(t_inner
1382            .iter()
1383            .zip(self.inv_sqrt_energies.iter())
1384            .map(|(&t, &inv_sqrt_e)| t + b0 + b1 * inv_sqrt_e)
1385            .collect())
1386    }
1387
1388    fn analytical_jacobian(
1389        &self,
1390        params: &[f64],
1391        free_param_indices: &[usize],
1392        y_current: &[f64],
1393    ) -> Option<FlatMatrix> {
1394        let n_e = y_current.len();
1395        let n_free = free_param_indices.len();
1396
1397        // Identify which free params are background vs inner model.
1398        let b0_col = free_param_indices.iter().position(|&i| i == self.b0_index);
1399        let b1_col = free_param_indices.iter().position(|&i| i == self.b1_index);
1400
1401        // Inner model free params (those not b0 or b1).
1402        let inner_free: Vec<usize> = free_param_indices
1403            .iter()
1404            .copied()
1405            .filter(|&i| i != self.b0_index && i != self.b1_index)
1406            .collect();
1407
1408        // Get inner model Jacobian for density columns.
1409        let inner_jac = if !inner_free.is_empty() {
1410            // Evaluate inner model at current params to get T_inner for y_current.
1411            let t_inner = self.inner.evaluate(params).ok()?;
1412            self.inner
1413                .analytical_jacobian(params, &inner_free, &t_inner)
1414        } else {
1415            None
1416        };
1417
1418        let mut jacobian = FlatMatrix::zeros(n_e, n_free);
1419
1420        // Fill inner model columns (density, temperature).
1421        // Inner Jacobian is the same as bare model — background doesn't
1422        // affect ∂T_inner/∂nₖ.
1423        if let Some(ref ij) = inner_jac {
1424            let mut inner_col = 0;
1425            for (col, &fp) in free_param_indices.iter().enumerate() {
1426                if fp == self.b0_index || fp == self.b1_index {
1427                    continue;
1428                }
1429                for row in 0..n_e {
1430                    *jacobian.get_mut(row, col) = ij.get(row, inner_col);
1431                }
1432                inner_col += 1;
1433            }
1434        } else {
1435            // No analytical inner Jacobian — fall back to FD for entire model.
1436            return None;
1437        }
1438
1439        // Background columns.
1440        if let Some(col) = b0_col {
1441            for row in 0..n_e {
1442                *jacobian.get_mut(row, col) = 1.0; // ∂T_out/∂b₀ = 1
1443            }
1444        }
1445        if let Some(col) = b1_col {
1446            for row in 0..n_e {
1447                *jacobian.get_mut(row, col) = self.inv_sqrt_energies[row]; // ∂T_out/∂b₁ = 1/√E
1448            }
1449        }
1450
1451        Some(jacobian)
1452    }
1453}
1454
1455impl<'a> crate::forward_model::ForwardModel for TransmissionKLBackgroundModel<'a> {
1456    fn predict(&self, params: &[f64]) -> Result<Vec<f64>, FittingError> {
1457        self.evaluate(params)
1458    }
1459
1460    fn n_data(&self) -> usize {
1461        self.inv_sqrt_energies.len()
1462    }
1463
1464    fn n_params(&self) -> usize {
1465        self.n_params
1466    }
1467}
1468
1469#[cfg(test)]
1470mod tests {
1471    use super::*;
1472    use crate::lm::FitModel;
1473    use crate::parameters::FitParameter;
1474
1475    /// Simple model: y = a * exp(-b * x)
1476    /// This mimics transmission: counts = flux * exp(-density * sigma)
1477    struct ExponentialModel {
1478        x: Vec<f64>,
1479        flux: Vec<f64>,
1480    }
1481
1482    impl FitModel for ExponentialModel {
1483        fn evaluate(&self, params: &[f64]) -> Result<Vec<f64>, FittingError> {
1484            let b = params[0]; // "density"
1485            Ok(self
1486                .x
1487                .iter()
1488                .zip(self.flux.iter())
1489                .map(|(&xi, &fi)| fi * (-b * xi).exp())
1490                .collect())
1491        }
1492    }
1493
1494    #[test]
1495    fn test_poisson_nll_perfect_match() {
1496        let y_obs = vec![10.0, 20.0, 30.0];
1497        let y_model = vec![10.0, 20.0, 30.0];
1498        let nll = poisson_nll(&y_obs, &y_model);
1499        // NLL = Σ(y_model - y_obs*ln(y_model))
1500        let expected: f64 = y_obs
1501            .iter()
1502            .zip(y_model.iter())
1503            .map(|(&o, &m)| m - o * m.ln())
1504            .sum();
1505        assert!((nll - expected).abs() < 1e-10);
1506    }
1507
1508    #[test]
1509    fn test_poisson_fit_exponential() {
1510        // Generate synthetic Poisson data from y = 1000 * exp(-0.5 * x)
1511        let x: Vec<f64> = (0..20).map(|i| i as f64 * 0.5).collect();
1512        let true_b = 0.5;
1513        let flux: Vec<f64> = vec![1000.0; x.len()];
1514
1515        let model = ExponentialModel {
1516            x: x.clone(),
1517            flux: flux.clone(),
1518        };
1519
1520        // Use exact expected counts (no noise) for reproducibility
1521        let y_obs = model.evaluate(&[true_b]).unwrap();
1522
1523        let mut params = ParameterSet::new(vec![
1524            FitParameter::non_negative("b", 1.0), // Initial guess 2× off
1525        ]);
1526
1527        let result = poisson_fit(&model, &y_obs, &mut params, &PoissonConfig::default()).unwrap();
1528
1529        assert!(
1530            result.converged,
1531            "Poisson fit did not converge after {} iterations",
1532            result.iterations,
1533        );
1534        assert!(
1535            (result.params[0] - true_b).abs() / true_b < 0.05,
1536            "Fitted b = {}, true = {}, error = {:.1}%",
1537            result.params[0],
1538            true_b,
1539            (result.params[0] - true_b).abs() / true_b * 100.0,
1540        );
1541    }
1542
1543    #[test]
1544    fn test_poisson_fit_low_counts() {
1545        // Low-count regime: flux = 10 counts per bin
1546        let x: Vec<f64> = (0..30).map(|i| i as f64 * 0.2).collect();
1547        let true_b = 0.3;
1548        let flux: Vec<f64> = vec![10.0; x.len()];
1549
1550        let model = ExponentialModel {
1551            x: x.clone(),
1552            flux: flux.clone(),
1553        };
1554
1555        let y_obs = model.evaluate(&[true_b]).unwrap();
1556
1557        let mut params = ParameterSet::new(vec![FitParameter::non_negative("b", 0.1)]);
1558
1559        let result = poisson_fit(&model, &y_obs, &mut params, &PoissonConfig::default()).unwrap();
1560
1561        assert!(result.converged);
1562        assert!(
1563            (result.params[0] - true_b).abs() / true_b < 0.1,
1564            "Low-count: fitted b = {}, true = {}",
1565            result.params[0],
1566            true_b,
1567        );
1568    }
1569
1570    #[test]
1571    fn test_poisson_non_negativity() {
1572        // Data that would drive parameter negative without constraint
1573        let x: Vec<f64> = (0..10).map(|i| i as f64).collect();
1574        let flux: Vec<f64> = vec![100.0; x.len()];
1575
1576        let model = ExponentialModel {
1577            x: x.clone(),
1578            flux: flux.clone(),
1579        };
1580
1581        // Generate data with b=0 (constant), but start with b=1
1582        let y_obs: Vec<f64> = vec![100.0; x.len()];
1583
1584        let mut params = ParameterSet::new(vec![FitParameter::non_negative("b", 1.0)]);
1585
1586        let result = poisson_fit(&model, &y_obs, &mut params, &PoissonConfig::default()).unwrap();
1587
1588        assert!(
1589            result.params[0] >= 0.0,
1590            "b should be non-negative, got {}",
1591            result.params[0],
1592        );
1593        assert!(
1594            result.params[0] < 0.1,
1595            "b should be ~0, got {}",
1596            result.params[0],
1597        );
1598    }
1599
1600    #[test]
1601    fn test_counts_model() {
1602        struct ConstTransmission;
1603        impl FitModel for ConstTransmission {
1604            fn evaluate(&self, params: &[f64]) -> Result<Vec<f64>, FittingError> {
1605                Ok(vec![params[0]; 3])
1606            }
1607        }
1608
1609        let t_model = ConstTransmission;
1610        let flux = [100.0, 200.0, 300.0];
1611        let background = [5.0, 10.0, 15.0];
1612        let counts_model = CountsModel {
1613            transmission_model: &t_model,
1614            flux: &flux,
1615            background: &background,
1616            n_params: 1,
1617        };
1618
1619        // T = 0.5 → counts = flux*0.5 + background
1620        let result = counts_model.evaluate(&[0.5]).unwrap();
1621        assert!((result[0] - 55.0).abs() < 1e-10);
1622        assert!((result[1] - 110.0).abs() < 1e-10);
1623        assert!((result[2] - 165.0).abs() < 1e-10);
1624        assert_eq!(
1625            crate::forward_model::ForwardModel::n_params(&counts_model),
1626            1
1627        );
1628    }
1629
1630    #[test]
1631    fn test_counts_background_scale_model() {
1632        struct ConstTransmission;
1633        impl FitModel for ConstTransmission {
1634            fn evaluate(&self, params: &[f64]) -> Result<Vec<f64>, FittingError> {
1635                Ok(vec![params[0]; 3])
1636            }
1637
1638            fn analytical_jacobian(
1639                &self,
1640                _params: &[f64],
1641                free_param_indices: &[usize],
1642                _y_current: &[f64],
1643            ) -> Option<FlatMatrix> {
1644                let mut jac = FlatMatrix::zeros(3, free_param_indices.len());
1645                for (col, &fp) in free_param_indices.iter().enumerate() {
1646                    if fp == 0 {
1647                        for row in 0..3 {
1648                            *jac.get_mut(row, col) = 1.0;
1649                        }
1650                    }
1651                }
1652                Some(jac)
1653            }
1654        }
1655
1656        let t_model = ConstTransmission;
1657        let flux = [100.0, 200.0, 300.0];
1658        let background = [5.0, 10.0, 15.0];
1659        let counts_model = CountsBackgroundScaleModel {
1660            transmission_model: &t_model,
1661            flux: &flux,
1662            background: &background,
1663            alpha1_index: 1,
1664            alpha2_index: 2,
1665            n_params: 3,
1666        };
1667
1668        let params = [0.5, 0.8, 1.5];
1669        let result = counts_model.evaluate(&params).unwrap();
1670        assert!((result[0] - 47.5).abs() < 1e-10);
1671        assert!((result[1] - 95.0).abs() < 1e-10);
1672        assert!((result[2] - 142.5).abs() < 1e-10);
1673        assert_eq!(
1674            crate::forward_model::ForwardModel::n_params(&counts_model),
1675            3
1676        );
1677    }
1678
1679    #[test]
1680    fn test_poisson_fit_multi_density_temperature_converges() {
1681        struct MultiDensityCountsModel {
1682            energies: Vec<f64>,
1683            flux: Vec<f64>,
1684            density_count: usize,
1685            temp_index: usize,
1686        }
1687
1688        impl MultiDensityCountsModel {
1689            fn sigma(&self, iso: usize, energy: f64, temp_k: f64) -> f64 {
1690                let center = 6.0 + iso as f64 * 4.5;
1691                let amp = 150.0 + 25.0 * iso as f64;
1692                let base_width = 0.8 + 0.12 * iso as f64;
1693                let width_coeff = 0.05 + 0.01 * iso as f64;
1694                let width = (base_width * (1.0 + width_coeff * (temp_k - 300.0) / 300.0)).max(0.1);
1695                let delta = energy - center;
1696                let gauss = (-(delta * delta) / (2.0 * width * width)).exp();
1697                amp * gauss
1698            }
1699
1700            fn dsigma_dt(&self, iso: usize, energy: f64, temp_k: f64) -> f64 {
1701                let center = 6.0 + iso as f64 * 4.5;
1702                let amp = 150.0 + 25.0 * iso as f64;
1703                let base_width = 0.8 + 0.12 * iso as f64;
1704                let width_coeff = 0.05 + 0.01 * iso as f64;
1705                let width = (base_width * (1.0 + width_coeff * (temp_k - 300.0) / 300.0)).max(0.1);
1706                let delta = energy - center;
1707                let gauss = (-(delta * delta) / (2.0 * width * width)).exp();
1708                let dwidth_dt = base_width * width_coeff / 300.0;
1709                amp * gauss * (delta * delta) * dwidth_dt / width.powi(3)
1710            }
1711        }
1712
1713        impl FitModel for MultiDensityCountsModel {
1714            fn evaluate(&self, params: &[f64]) -> Result<Vec<f64>, FittingError> {
1715                let temp_k = params[self.temp_index];
1716                let mut out = Vec::with_capacity(self.energies.len());
1717                for (i, &energy) in self.energies.iter().enumerate() {
1718                    let optical_depth = (0..self.density_count)
1719                        .map(|iso| params[iso] * self.sigma(iso, energy, temp_k))
1720                        .sum::<f64>();
1721                    out.push(self.flux[i] * (-optical_depth).exp());
1722                }
1723                Ok(out)
1724            }
1725
1726            fn analytical_jacobian(
1727                &self,
1728                params: &[f64],
1729                free_param_indices: &[usize],
1730                y_current: &[f64],
1731            ) -> Option<crate::lm::FlatMatrix> {
1732                let temp_k = params[self.temp_index];
1733                let mut jac =
1734                    crate::lm::FlatMatrix::zeros(self.energies.len(), free_param_indices.len());
1735                for (row, &energy) in self.energies.iter().enumerate() {
1736                    let y = y_current[row];
1737                    let mut sum_n_dsigma_dt = 0.0;
1738                    for (iso, &density) in params[..self.density_count].iter().enumerate() {
1739                        sum_n_dsigma_dt += density * self.dsigma_dt(iso, energy, temp_k);
1740                    }
1741                    for (col, &fp) in free_param_indices.iter().enumerate() {
1742                        let val = if fp == self.temp_index {
1743                            -y * sum_n_dsigma_dt
1744                        } else {
1745                            -y * self.sigma(fp, energy, temp_k)
1746                        };
1747                        *jac.get_mut(row, col) = val;
1748                    }
1749                }
1750                Some(jac)
1751            }
1752        }
1753
1754        let energies: Vec<f64> = (0..220).map(|i| 1.0 + 0.18 * i as f64).collect();
1755        let flux: Vec<f64> = energies
1756            .iter()
1757            .map(|&e| 1500.0 * (1.0 + 0.15 * (e / 8.0).sin()).max(0.2))
1758            .collect();
1759        let density_count = 6usize;
1760        let temp_index = density_count;
1761        let model = MultiDensityCountsModel {
1762            energies,
1763            flux,
1764            density_count,
1765            temp_index,
1766        };
1767
1768        let true_params = vec![3.2e-4, 2.4e-4, 1.7e-4, 1.1e-4, 7.5e-5, 4.2e-5, 360.0];
1769        let y_obs = model.evaluate(&true_params).unwrap();
1770
1771        let mut params = ParameterSet::new(vec![
1772            FitParameter::non_negative("n0", 6.0e-4),
1773            FitParameter::non_negative("n1", 4.0e-4),
1774            FitParameter::non_negative("n2", 2.5e-4),
1775            FitParameter::non_negative("n3", 1.8e-4),
1776            FitParameter::non_negative("n4", 1.0e-4),
1777            FitParameter::non_negative("n5", 8.0e-5),
1778            FitParameter {
1779                name: "temperature_k".into(),
1780                value: 300.0,
1781                lower: 1.0,
1782                upper: 5000.0,
1783                fixed: false,
1784            },
1785        ]);
1786
1787        let config = PoissonConfig {
1788            max_iter: 200,
1789            gauss_newton_lambda: 1e-4,
1790            ..PoissonConfig::default()
1791        };
1792        let result = poisson_fit(&model, &y_obs, &mut params, &config).unwrap();
1793
1794        assert!(
1795            result.converged,
1796            "multi-density+temperature Poisson fit did not converge after {} iterations",
1797            result.iterations,
1798        );
1799        assert!(
1800            result.iterations < config.max_iter,
1801            "fit hit max_iter={}, params={:?}",
1802            config.max_iter,
1803            result.params,
1804        );
1805
1806        for (i, (&fit, &truth)) in result.params[..density_count]
1807            .iter()
1808            .zip(true_params[..density_count].iter())
1809            .enumerate()
1810        {
1811            let rel_err = (fit - truth).abs() / truth;
1812            assert!(
1813                rel_err < 0.10,
1814                "density[{i}] fit={fit} truth={truth} rel_err={rel_err:.3}",
1815            );
1816        }
1817
1818        let fitted_temp = result.params[temp_index];
1819        assert!(
1820            (fitted_temp - true_params[temp_index]).abs() < 10.0,
1821            "temperature fit={fitted_temp} truth={}",
1822            true_params[temp_index],
1823        );
1824        assert!(
1825            result.iterations <= 80,
1826            "expected analytical KL path to converge well before max_iter; got {}",
1827            result.iterations,
1828        );
1829    }
1830
1831    #[test]
1832    fn test_poisson_fit_exact_optimum_without_analytical_jacobian_converges() {
1833        let x: Vec<f64> = (0..20).map(|i| i as f64 * 0.5).collect();
1834        let true_b = 0.5;
1835        let flux: Vec<f64> = vec![1000.0; x.len()];
1836
1837        let model = ExponentialModel { x, flux };
1838        let y_obs = model.evaluate(&[true_b]).unwrap();
1839        let mut params = ParameterSet::new(vec![FitParameter::non_negative("b", true_b)]);
1840        let config = PoissonConfig {
1841            fd_step: 1e-4,
1842            tol_param: 1e-12,
1843            max_iter: 50,
1844            ..PoissonConfig::default()
1845        };
1846
1847        let result = poisson_fit(&model, &y_obs, &mut params, &config).unwrap();
1848
1849        assert!(
1850            result.converged,
1851            "exact-optimum FD fit should converge instead of exhausting line search"
1852        );
1853        assert!(
1854            result.iterations < config.max_iter,
1855            "fit should stop by convergence, not hit max_iter"
1856        );
1857        assert!(
1858            (result.params[0] - true_b).abs() < 1e-6,
1859            "parameter drifted away from optimum: {}",
1860            result.params[0]
1861        );
1862    }
1863
1864    #[test]
1865    fn test_projected_gradient_ignores_lower_bound_blocked_direction() {
1866        let params = ParameterSet::new(vec![
1867            FitParameter::non_negative("density", 0.0),
1868            FitParameter::unbounded("temp", 300.0),
1869        ]);
1870        let free_idx = vec![0, 1];
1871        let grad = vec![0.25, -0.5];
1872
1873        let inactive = inactive_free_positions(&params, &free_idx, &grad);
1874        assert_eq!(
1875            inactive,
1876            vec![1],
1877            "lower-bound blocked density should be active"
1878        );
1879
1880        let pg_norm = projected_gradient_norm(&params, &free_idx, &grad);
1881        assert!(
1882            (pg_norm - 0.5).abs() < 1e-12,
1883            "projected gradient should ignore blocked lower-bound component"
1884        );
1885    }
1886
1887    #[test]
1888    fn test_max_feasible_step_hits_lower_bound() {
1889        let params = ParameterSet::new(vec![
1890            FitParameter::non_negative("density", 0.2),
1891            FitParameter::unbounded("temp", 300.0),
1892        ]);
1893        let alpha = max_feasible_step(&params, &[0, 1], &[0.2, 300.0], &[0.5, 0.0]);
1894        assert!(
1895            (alpha - 0.4).abs() < 1e-12,
1896            "feasible step should stop exactly at lower bound"
1897        );
1898    }
1899
1900    #[test]
1901    fn test_max_feasible_step_hits_upper_bound() {
1902        let params = ParameterSet::new(vec![FitParameter {
1903            name: "temp".into(),
1904            value: 300.0,
1905            lower: 1.0,
1906            upper: 500.0,
1907            fixed: false,
1908        }]);
1909        let alpha = max_feasible_step(&params, &[0], &[300.0], &[-50.0]);
1910        assert!(
1911            (alpha - 4.0).abs() < 1e-12,
1912            "feasible step should stop exactly at upper bound"
1913        );
1914    }
1915
1916    #[test]
1917    fn test_inactive_mask_changes_when_bound_activity_changes() {
1918        let free_idx = vec![0, 1];
1919        let params_at_bound = ParameterSet::new(vec![
1920            FitParameter::non_negative("density", 0.0),
1921            FitParameter::non_negative("temp", 1.0),
1922        ]);
1923        let params_free = ParameterSet::new(vec![
1924            FitParameter::non_negative("density", 0.2),
1925            FitParameter::non_negative("temp", 1.0),
1926        ]);
1927
1928        let mask_at_bound = inactive_free_mask(&params_at_bound, &free_idx, &[0.3, -0.2]);
1929        let mask_free = inactive_free_mask(&params_free, &free_idx, &[0.3, -0.2]);
1930
1931        assert_eq!(mask_at_bound, vec![false, true]);
1932        assert_eq!(mask_free, vec![true, true]);
1933        assert_ne!(
1934            mask_at_bound, mask_free,
1935            "active-set changes should invalidate FD quasi-Newton history"
1936        );
1937    }
1938
1939    #[test]
1940    fn test_lbfgs_history_two_loop_matches_secant_direction() {
1941        let mut history = LbfgsHistory::new(4);
1942        history.update(&[0.0], &[1.0], &[0.0], &[2.0]);
1943        let dir = history
1944            .apply_on_positions(&[4.0], &[0])
1945            .expect("history should produce a direction");
1946        assert!(
1947            (dir[0] - 2.0).abs() < 1e-12,
1948            "1D secant pair should scale gradient by inverse curvature"
1949        );
1950    }
1951
1952    #[test]
1953    fn test_lbfgs_subspace_ignores_active_components() {
1954        let mut history = LbfgsHistory::new(4);
1955        history.update(&[0.0, 0.0], &[1.0, 100.0], &[0.0, 0.0], &[2.0, 100.0]);
1956        let dir = history
1957            .apply_on_positions(&[4.0, 50.0], &[0])
1958            .expect("subspace history should produce a direction");
1959        assert!(
1960            (dir[0] - 2.0).abs() < 1e-12,
1961            "inactive-subspace L-BFGS should match 1D secant scaling on the free variable"
1962        );
1963        assert!(
1964            dir[1].abs() < 1e-12,
1965            "inactive-subspace L-BFGS should not leak blocked-variable history into the direction"
1966        );
1967    }
1968
1969    #[test]
1970    fn test_poisson_fit_converges_at_bound_active_optimum() {
1971        struct OffsetModel {
1972            base: Vec<f64>,
1973        }
1974
1975        impl FitModel for OffsetModel {
1976            fn evaluate(&self, params: &[f64]) -> Result<Vec<f64>, FittingError> {
1977                Ok(self.base.iter().map(|&b| b + params[0]).collect())
1978            }
1979
1980            fn analytical_jacobian(
1981                &self,
1982                _params: &[f64],
1983                free_param_indices: &[usize],
1984                y_current: &[f64],
1985            ) -> Option<FlatMatrix> {
1986                let mut jac = FlatMatrix::zeros(y_current.len(), free_param_indices.len());
1987                for (col, &fp) in free_param_indices.iter().enumerate() {
1988                    assert_eq!(fp, 0);
1989                    for row in 0..y_current.len() {
1990                        *jac.get_mut(row, col) = 1.0;
1991                    }
1992                }
1993                Some(jac)
1994            }
1995        }
1996
1997        let model = OffsetModel {
1998            base: vec![10.0; 12],
1999        };
2000        let y_obs = vec![8.0; 12];
2001        let mut params = ParameterSet::new(vec![FitParameter::non_negative("offset", 0.0)]);
2002
2003        let result = poisson_fit(&model, &y_obs, &mut params, &PoissonConfig::default()).unwrap();
2004
2005        assert!(
2006            result.converged,
2007            "bound-active optimum should satisfy projected optimality"
2008        );
2009        assert_eq!(
2010            result.iterations, 1,
2011            "should stop on projected-gradient check"
2012        );
2013        assert!(
2014            result.params[0].abs() < 1e-12,
2015            "offset should stay pinned at lower bound, got {}",
2016            result.params[0]
2017        );
2018    }
2019
2020    #[test]
2021    fn test_poisson_fit_fd_lbfgs_handles_coupled_two_parameter_model() {
2022        struct CoupledExponentialModel {
2023            x: Vec<f64>,
2024        }
2025
2026        impl FitModel for CoupledExponentialModel {
2027            fn evaluate(&self, params: &[f64]) -> Result<Vec<f64>, FittingError> {
2028                let amp = params[0];
2029                let decay = params[1];
2030                Ok(self
2031                    .x
2032                    .iter()
2033                    .map(|&x| amp * (-decay * x).exp() + 1.0)
2034                    .collect())
2035            }
2036        }
2037
2038        let model = CoupledExponentialModel {
2039            x: (0..60).map(|i| i as f64 * 0.08).collect(),
2040        };
2041        let true_params = [120.0, 0.45];
2042        let y_obs = model.evaluate(&true_params).unwrap();
2043        let mut params = ParameterSet::new(vec![
2044            FitParameter::non_negative("amp", 30.0),
2045            FitParameter::non_negative("decay", 1.2),
2046        ]);
2047        let config = PoissonConfig {
2048            max_iter: 120,
2049            lbfgs_history: 8,
2050            ..PoissonConfig::default()
2051        };
2052        let result = poisson_fit(&model, &y_obs, &mut params, &config).unwrap();
2053        let mut baseline_params = ParameterSet::new(vec![
2054            FitParameter::non_negative("amp", 30.0),
2055            FitParameter::non_negative("decay", 1.2),
2056        ]);
2057        let baseline = poisson_fit(
2058            &model,
2059            &y_obs,
2060            &mut baseline_params,
2061            &PoissonConfig {
2062                lbfgs_history: 0,
2063                ..config.clone()
2064            },
2065        )
2066        .unwrap();
2067
2068        assert!(
2069            result.converged,
2070            "FD L-BFGS fit did not converge: {result:?}"
2071        );
2072        assert!(
2073            baseline.converged,
2074            "baseline FD fit should still converge: {baseline:?}"
2075        );
2076        assert!(
2077            result.iterations <= 60,
2078            "expected FD quasi-Newton path to converge well before max_iter; got {}",
2079            result.iterations,
2080        );
2081        assert!(
2082            result.iterations < baseline.iterations,
2083            "L-BFGS fallback should beat no-history gradient scaling: lbfgs={} baseline={}",
2084            result.iterations,
2085            baseline.iterations,
2086        );
2087        assert!(
2088            (result.params[0] - true_params[0]).abs() / true_params[0] < 0.02,
2089            "amplitude fit={}, true={}",
2090            result.params[0],
2091            true_params[0],
2092        );
2093        assert!(
2094            (result.params[1] - true_params[1]).abs() / true_params[1] < 0.02,
2095            "decay fit={}, true={}",
2096            result.params[1],
2097            true_params[1],
2098        );
2099    }
2100
2101    #[test]
2102    fn test_poisson_fit_fd_lbfgs_with_bound_active_offset_uses_subspace() {
2103        struct OffsetDecayModel {
2104            x: Vec<f64>,
2105        }
2106
2107        impl FitModel for OffsetDecayModel {
2108            fn evaluate(&self, params: &[f64]) -> Result<Vec<f64>, FittingError> {
2109                let offset = params[0];
2110                let decay = params[1];
2111                Ok(self
2112                    .x
2113                    .iter()
2114                    .map(|&x| offset + (-decay * x).exp())
2115                    .collect())
2116            }
2117        }
2118
2119        let model = OffsetDecayModel {
2120            x: (0..60).map(|i| i as f64 * 0.08).collect(),
2121        };
2122        let true_params = [0.0, 0.35];
2123        let y_obs = model.evaluate(&true_params).unwrap();
2124
2125        let config = PoissonConfig {
2126            max_iter: 120,
2127            lbfgs_history: 8,
2128            ..PoissonConfig::default()
2129        };
2130        let mut params = ParameterSet::new(vec![
2131            FitParameter::non_negative("offset", 0.0),
2132            FitParameter::non_negative("decay", 1.1),
2133        ]);
2134        let result = poisson_fit(&model, &y_obs, &mut params, &config).unwrap();
2135        assert!(
2136            result.converged,
2137            "subspace FD L-BFGS fit did not converge: {result:?}"
2138        );
2139        assert!(
2140            result.iterations <= 20,
2141            "bound-active subspace FD fit should converge comfortably before max_iter; got {}",
2142            result.iterations,
2143        );
2144        assert!(
2145            result.params[0].abs() < 1e-8,
2146            "offset should remain at the lower bound, got {}",
2147            result.params[0]
2148        );
2149        assert!(
2150            (result.params[1] - true_params[1]).abs() / true_params[1] < 0.02,
2151            "decay fit={}, true={}",
2152            result.params[1],
2153            true_params[1],
2154        );
2155    }
2156
2157    #[test]
2158    fn test_poisson_fit_temperature_and_background_converges() {
2159        struct TempTransmissionModel {
2160            energies: Vec<f64>,
2161        }
2162
2163        impl TempTransmissionModel {
2164            fn sigma(&self, energy: f64, temp_k: f64) -> f64 {
2165                let center = 6.0;
2166                let amp = 110.0;
2167                let base_width = 0.55;
2168                let width = (base_width * (temp_k / 300.0).sqrt()).max(0.08);
2169                let delta = energy - center;
2170                amp * (-(delta * delta) / (2.0 * width * width)).exp()
2171            }
2172
2173            fn dsigma_dt(&self, energy: f64, temp_k: f64) -> f64 {
2174                let center = 6.0;
2175                let amp = 110.0;
2176                let base_width = 0.55;
2177                let width = (base_width * (temp_k / 300.0).sqrt()).max(0.08);
2178                let dwidth_dt = if temp_k > 0.0 {
2179                    base_width / (2.0 * (300.0 * temp_k).sqrt())
2180                } else {
2181                    0.0
2182                };
2183                let delta = energy - center;
2184                let gauss = (-(delta * delta) / (2.0 * width * width)).exp();
2185                amp * gauss * (delta * delta) * dwidth_dt / width.powi(3)
2186            }
2187        }
2188
2189        impl FitModel for TempTransmissionModel {
2190            fn evaluate(&self, params: &[f64]) -> Result<Vec<f64>, FittingError> {
2191                let density = params[0];
2192                let temp_k = params[1];
2193                Ok(self
2194                    .energies
2195                    .iter()
2196                    .map(|&energy| (-density * self.sigma(energy, temp_k)).exp())
2197                    .collect())
2198            }
2199
2200            fn analytical_jacobian(
2201                &self,
2202                params: &[f64],
2203                free_param_indices: &[usize],
2204                y_current: &[f64],
2205            ) -> Option<FlatMatrix> {
2206                let density = params[0];
2207                let temp_k = params[1];
2208                let mut jac = FlatMatrix::zeros(self.energies.len(), free_param_indices.len());
2209                for (row, &energy) in self.energies.iter().enumerate() {
2210                    let y = y_current[row];
2211                    let sigma = self.sigma(energy, temp_k);
2212                    let dsigma_dt = self.dsigma_dt(energy, temp_k);
2213                    for (col, &fp) in free_param_indices.iter().enumerate() {
2214                        let deriv = match fp {
2215                            0 => -sigma * y,
2216                            1 => -density * dsigma_dt * y,
2217                            _ => unreachable!("unexpected parameter index {fp}"),
2218                        };
2219                        *jac.get_mut(row, col) = deriv;
2220                    }
2221                }
2222                Some(jac)
2223            }
2224        }
2225
2226        let energies: Vec<f64> = (0..180).map(|i| 1.0 + 0.06 * i as f64).collect();
2227        let inner = TempTransmissionModel {
2228            energies: energies.clone(),
2229        };
2230        let inv_sqrt_energies: Vec<f64> = energies.iter().map(|&e| 1.0 / e.sqrt()).collect();
2231        let wrapped = TransmissionKLBackgroundModel {
2232            inner: &inner,
2233            inv_sqrt_energies,
2234            b0_index: 2,
2235            b1_index: 3,
2236            n_params: 4,
2237        };
2238
2239        let true_params = vec![4.5e-4, 345.0, 0.012, 0.008];
2240        let y_obs = wrapped.evaluate(&true_params).unwrap();
2241
2242        let mut params = ParameterSet::new(vec![
2243            FitParameter::non_negative("density", 8.0e-4),
2244            FitParameter {
2245                name: "temperature_k".into(),
2246                value: 290.0,
2247                lower: 1.0,
2248                upper: 5000.0,
2249                fixed: false,
2250            },
2251            FitParameter {
2252                name: "kl_b0".into(),
2253                value: 0.0,
2254                lower: 0.0,
2255                upper: 0.5,
2256                fixed: false,
2257            },
2258            FitParameter {
2259                name: "kl_b1".into(),
2260                value: 0.0,
2261                lower: 0.0,
2262                upper: 0.5,
2263                fixed: false,
2264            },
2265        ]);
2266
2267        let config = PoissonConfig {
2268            max_iter: 120,
2269            gauss_newton_lambda: 1e-4,
2270            ..PoissonConfig::default()
2271        };
2272        let result = poisson_fit(&wrapped, &y_obs, &mut params, &config).unwrap();
2273
2274        assert!(result.converged, "fit did not converge: {result:?}");
2275        assert!(
2276            result.iterations <= 80,
2277            "expected convergence well before max_iter; got {}",
2278            result.iterations,
2279        );
2280        assert!(
2281            (result.params[0] - true_params[0]).abs() / true_params[0] < 0.05,
2282            "density fit={}, true={}",
2283            result.params[0],
2284            true_params[0],
2285        );
2286        assert!(
2287            (result.params[1] - true_params[1]).abs() < 8.0,
2288            "temperature fit={}, true={}",
2289            result.params[1],
2290            true_params[1],
2291        );
2292        assert!(
2293            (result.params[2] - true_params[2]).abs() < 5e-3,
2294            "b0 fit={}, true={}",
2295            result.params[2],
2296            true_params[2],
2297        );
2298        assert!(
2299            (result.params[3] - true_params[3]).abs() < 5e-3,
2300            "b1 fit={}, true={}",
2301            result.params[3],
2302            true_params[3],
2303        );
2304    }
2305}