Skip to main content

nereids_io/
spectrum.rs

1//! Spectrum file parser for TOF/energy bin edges or centers.
2//!
3//! Parses CSV/TXT files containing TOF or energy values that define the
4//! spectral bins of a neutron imaging dataset.
5//!
6//! ## Supported formats
7//! - Single-column: one value per line
8//! - Two-column (CSV/TSV): first column used, rest ignored
9//! - Comment lines starting with `#` are skipped
10//! - First non-comment line skipped if it cannot be parsed as a number (header)
11
12use std::path::Path;
13
14use crate::error::IoError;
15
16/// Whether spectrum values represent TOF or energy.
17#[derive(Debug, Clone, Copy, PartialEq, Eq)]
18pub enum SpectrumUnit {
19    /// Values are TOF bin edges/centers in microseconds.
20    TofMicroseconds,
21    /// Values are energy bin edges/centers in eV.
22    EnergyEv,
23}
24
25/// Whether values are bin edges (N+1 for N bins) or bin centers (N for N bins).
26#[derive(Debug, Clone, Copy, PartialEq, Eq)]
27pub enum SpectrumValueKind {
28    /// N+1 values defining the boundaries of N bins.
29    BinEdges,
30    /// N values at the center of each bin.
31    BinCenters,
32}
33
34/// Parse a spectrum file from disk.
35///
36/// Returns the first column of numeric values, skipping comment and header lines.
37/// Supports comma, tab, and whitespace as delimiters.
38///
39/// # Assumptions
40///
41/// - **Column semantics**: only the first numeric column is extracted; any
42///   additional columns (e.g., counts, intensity) are silently ignored.
43/// - **Units are not inferred**: the caller must know whether values represent
44///   TOF in microseconds or energy in eV and set [`SpectrumUnit`] accordingly.
45/// - **Malformed lines**: comment lines (`#`-prefixed) and blank lines are
46///   skipped.  The first non-comment, non-numeric line is treated as a header
47///   and skipped; a second such line is a hard error.
48/// - **Non-finite values** (NaN, Inf) produce a hard error.
49/// - **Minimum length**: at least 2 values are required.
50pub fn parse_spectrum_file(path: &Path) -> Result<Vec<f64>, IoError> {
51    let content = std::fs::read_to_string(path)
52        .map_err(|e| IoError::FileNotFound(path.to_string_lossy().into_owned(), e))?;
53    parse_spectrum_text(&content)
54}
55
56/// Parse spectrum values from a string.
57///
58/// Extracts the first numeric column. Lines starting with `#` are comments.
59/// The first non-comment line that cannot be parsed as a number is treated
60/// as a header and skipped (only one such line is allowed).
61pub fn parse_spectrum_text(text: &str) -> Result<Vec<f64>, IoError> {
62    let mut values = Vec::new();
63    let mut skipped_header = false;
64
65    for line in text.lines() {
66        let trimmed = line.trim();
67        if trimmed.is_empty() || trimmed.starts_with('#') {
68            continue;
69        }
70        // Extract first token (split by comma, tab, or whitespace)
71        let first_token = trimmed
72            .split(|c: char| c == ',' || c == '\t' || c.is_ascii_whitespace())
73            .next()
74            .unwrap_or("")
75            .trim();
76
77        match first_token.parse::<f64>() {
78            Ok(val) => {
79                if !val.is_finite() {
80                    return Err(IoError::InvalidParameter(format!(
81                        "Non-finite value in spectrum file: {}",
82                        val
83                    )));
84                }
85                values.push(val);
86            }
87            Err(_) => {
88                if !skipped_header && values.is_empty() {
89                    skipped_header = true;
90                    continue;
91                }
92                return Err(IoError::InvalidParameter(format!(
93                    "Unparseable value in spectrum file: '{}'",
94                    first_token
95                )));
96            }
97        }
98    }
99
100    if values.len() < 2 {
101        return Err(IoError::InvalidParameter(
102            "Spectrum file must contain at least 2 values".into(),
103        ));
104    }
105
106    Ok(values)
107}
108
109/// Validate that spectrum values are compatible with the TIFF frame count.
110///
111/// For bin edges: `n_values == n_frames + 1`.
112/// For bin centers: `n_values == n_frames`.
113pub fn validate_spectrum_frame_count(
114    n_values: usize,
115    n_frames: usize,
116    kind: SpectrumValueKind,
117) -> Result<(), IoError> {
118    let expected = match kind {
119        SpectrumValueKind::BinEdges => n_frames + 1,
120        SpectrumValueKind::BinCenters => n_frames,
121    };
122    if n_values != expected {
123        return Err(IoError::InvalidParameter(format!(
124            "Spectrum has {} values but TIFF has {} frames (expected {} for {:?})",
125            n_values, n_frames, expected, kind,
126        )));
127    }
128    Ok(())
129}
130
131/// Validate that values are strictly monotonically increasing.
132pub fn validate_monotonic(values: &[f64]) -> Result<(), IoError> {
133    for window in values.windows(2) {
134        match window[0].partial_cmp(&window[1]) {
135            Some(std::cmp::Ordering::Less) => {} // strictly increasing — OK
136            _ => {
137                // Equal, decreasing, or NaN (partial_cmp returns None)
138                return Err(IoError::InvalidParameter(format!(
139                    "Spectrum values must be strictly increasing, but found {} >= {}",
140                    window[0], window[1],
141                )));
142            }
143        }
144    }
145    Ok(())
146}
147
148#[cfg(test)]
149mod tests {
150    use super::*;
151
152    #[test]
153    fn test_parse_single_column() {
154        let text = "1000.0\n2000.0\n3000.0\n4000.0\n";
155        let values = parse_spectrum_text(text).unwrap();
156        assert_eq!(values, vec![1000.0, 2000.0, 3000.0, 4000.0]);
157    }
158
159    #[test]
160    fn test_parse_two_column_csv() {
161        let text = "1000.0,0.5\n2000.0,0.6\n3000.0,0.7\n";
162        let values = parse_spectrum_text(text).unwrap();
163        assert_eq!(values, vec![1000.0, 2000.0, 3000.0]);
164    }
165
166    #[test]
167    fn test_parse_whitespace_separated() {
168        let text = "1000.0  0.5\n2000.0  0.6\n3000.0  0.7\n";
169        let values = parse_spectrum_text(text).unwrap();
170        assert_eq!(values, vec![1000.0, 2000.0, 3000.0]);
171    }
172
173    #[test]
174    fn test_parse_comments_and_header() {
175        let text = "\
176# This is a comment
177# Another comment
178TOF_us, intensity
1791000.0, 0.5
1802000.0, 0.6
1813000.0, 0.7
182";
183        let values = parse_spectrum_text(text).unwrap();
184        assert_eq!(values, vec![1000.0, 2000.0, 3000.0]);
185    }
186
187    #[test]
188    fn test_parse_tab_separated() {
189        let text = "1000.0\t0.5\n2000.0\t0.6\n3000.0\t0.7\n";
190        let values = parse_spectrum_text(text).unwrap();
191        assert_eq!(values, vec![1000.0, 2000.0, 3000.0]);
192    }
193
194    #[test]
195    fn test_parse_empty_lines_ignored() {
196        let text = "\n1000.0\n\n2000.0\n\n3000.0\n\n";
197        let values = parse_spectrum_text(text).unwrap();
198        assert_eq!(values, vec![1000.0, 2000.0, 3000.0]);
199    }
200
201    #[test]
202    fn test_parse_too_few_values() {
203        let text = "1000.0\n";
204        let result = parse_spectrum_text(text);
205        assert!(result.is_err());
206        assert!(
207            format!("{}", result.unwrap_err()).contains("at least 2"),
208            "Expected 'at least 2' error"
209        );
210    }
211
212    #[test]
213    fn test_parse_non_finite_value() {
214        let text = "1000.0\nNaN\n3000.0\n";
215        let result = parse_spectrum_text(text);
216        assert!(result.is_err());
217        assert!(
218            format!("{}", result.unwrap_err()).contains("Non-finite"),
219            "Expected non-finite error"
220        );
221    }
222
223    #[test]
224    fn test_parse_unparseable_after_data() {
225        let text = "1000.0\n2000.0\nbad_value\n";
226        let result = parse_spectrum_text(text);
227        assert!(result.is_err());
228        assert!(
229            format!("{}", result.unwrap_err()).contains("Unparseable"),
230            "Expected unparseable error"
231        );
232    }
233
234    #[test]
235    fn test_validate_frame_count_edges() {
236        // 5 frames need 6 edges
237        assert!(validate_spectrum_frame_count(6, 5, SpectrumValueKind::BinEdges).is_ok());
238        assert!(validate_spectrum_frame_count(5, 5, SpectrumValueKind::BinEdges).is_err());
239        assert!(validate_spectrum_frame_count(7, 5, SpectrumValueKind::BinEdges).is_err());
240    }
241
242    #[test]
243    fn test_validate_frame_count_centers() {
244        // 5 frames need 5 centers
245        assert!(validate_spectrum_frame_count(5, 5, SpectrumValueKind::BinCenters).is_ok());
246        assert!(validate_spectrum_frame_count(6, 5, SpectrumValueKind::BinCenters).is_err());
247    }
248
249    #[test]
250    fn test_validate_monotonic_ok() {
251        assert!(validate_monotonic(&[1.0, 2.0, 3.0, 4.0]).is_ok());
252    }
253
254    #[test]
255    fn test_validate_monotonic_equal() {
256        let result = validate_monotonic(&[1.0, 2.0, 2.0, 4.0]);
257        assert!(result.is_err());
258    }
259
260    #[test]
261    fn test_validate_monotonic_decreasing() {
262        let result = validate_monotonic(&[1.0, 3.0, 2.0, 4.0]);
263        assert!(result.is_err());
264    }
265
266    #[test]
267    fn test_validate_monotonic_nan() {
268        let result = validate_monotonic(&[1.0, f64::NAN, 3.0]);
269        assert!(result.is_err(), "NaN should fail monotonicity check");
270    }
271
272    #[test]
273    fn test_parse_spectrum_file_not_found() {
274        let result = parse_spectrum_file(Path::new("/nonexistent/spectrum.csv"));
275        assert!(result.is_err());
276    }
277}