Skip to main content

ssz/
sparse.rs

1//! `SparseList<T, N>` — a list view that may omit materializing parts of
2//! the tree, using cached subtree roots or zero-hashes for empty regions.
3//!
4//! The hash tree root is byte-identical to a fully-materialised
5//! `List<T, N>` with the same effective contents. The algorithm walks the
6//! implicit balanced binary tree of depth `ceil_log2(N)` iteratively,
7//! using a fixed-size stack — never materialising the full `N` leaves.
8//!
9//! ## Storage
10//!
11//! Both inner maps are sorted `Vec`s keyed by `u64`. Sorted Vec gives us:
12//!
13//! - **O(log n) lookup** via `binary_search_by_key`.
14//! - **O(n) insert/remove** at the sorted position. For the cnode-slot
15//!   use case (N ≤ 256, typically very sparse), the linear shift is
16//!   trivial.
17//! - **O(log n) range queries** via `partition_point`, used by
18//!   `compute_subtree_root` to short-circuit empty subtrees.
19//! - **Iteration in sorted order**, byte-equivalent to `BTreeMap::iter`.
20
21use alloc::vec::Vec;
22use core::fmt;
23use digest::Digest;
24use digest::typenum::U32;
25
26use crate::merkle::{ceil_log2, hash_pair, mix_in_length, zero_hash};
27use crate::missing::MissingOr;
28use crate::{BYTES_PER_LENGTH_OFFSET, Decode, DecodeError, Encode, HashTreeRoot};
29
30/// A list with a maximum length of `N` that exposes its tree structure
31/// for sparse fill-in: materialized indices, cached subtree roots, or
32/// implicit zero-hashes for never-written regions.
33///
34/// Hash is byte-identical to a fully-materialised `List<T, N>` with the
35/// same effective contents.
36pub struct SparseList<T, const N: u64> {
37    len: u64,
38    /// Sorted (by `u64` key) entries: leaf index → optional materialized
39    /// value (or precomputed hash). Absent indices contribute
40    /// `zero_hash(0)` to the root unless covered by
41    /// [`cached_subtree_roots`].
42    entries: Vec<(u64, MissingOr<T>)>,
43    /// Sorted (by `u64` key) cache of precomputed subtree roots. Key is
44    /// a tree coordinate `(depth, index_at_depth)` flattened via
45    /// `coord_to_key(depth, idx) = (1u64 << depth) | idx` — the standard
46    /// "heap index" of a node in a complete binary tree.
47    cached_subtree_roots: Vec<(u64, [u8; 32])>,
48}
49
50impl<T, const N: u64> SparseList<T, N> {
51    /// Build an empty sparse list.
52    pub fn new() -> Self {
53        Self {
54            len: 0,
55            entries: Vec::new(),
56            cached_subtree_roots: Vec::new(),
57        }
58    }
59
60    /// Logical length.
61    pub fn len(&self) -> u64 {
62        self.len
63    }
64
65    /// `true` iff no entries are present and `len == 0`.
66    pub fn is_empty(&self) -> bool {
67        self.len == 0
68    }
69
70    /// Iterator over `(index, MissingOr<T>)` for materialized entries only.
71    pub fn iter(&self) -> impl Iterator<Item = (u64, &MissingOr<T>)> {
72        self.entries.iter().map(|(k, v)| (*k, v))
73    }
74
75    /// Mutable iterator over `(index, &mut MissingOr<T>)` for materialized
76    /// entries only. Used by callers that need to rewrite entry values
77    /// in place (e.g., resolving `Ref` targets to `Hash` after settle).
78    pub fn iter_mut(&mut self) -> impl Iterator<Item = (u64, &mut MissingOr<T>)> {
79        self.entries.iter_mut().map(|(k, v)| (*k, v))
80    }
81
82    /// Number of materialized entries. Distinct from [`len`](Self::len),
83    /// which is the logical length (max index + 1).
84    pub fn entries_count(&self) -> usize {
85        self.entries.len()
86    }
87
88    /// Look up a single entry by leaf index. O(log n).
89    pub fn get(&self, idx: u64) -> Option<&MissingOr<T>> {
90        match self.entries.binary_search_by_key(&idx, |(k, _)| *k) {
91            Ok(pos) => Some(&self.entries[pos].1),
92            Err(_) => None,
93        }
94    }
95
96    /// Insert a materialized entry. Updates `len` to `max(len, idx + 1)`.
97    /// O(n) — sorted shift on insert. If `idx` is already present, the
98    /// existing value is overwritten (matching `BTreeMap::insert` semantics).
99    pub fn insert(&mut self, idx: u64, value: MissingOr<T>) -> Result<(), DecodeError> {
100        if idx >= N {
101            return Err(DecodeError::BoundExceeded {
102                len: idx + 1,
103                bound: N,
104            });
105        }
106        self.len = self.len.max(idx + 1);
107        match self.entries.binary_search_by_key(&idx, |(k, _)| *k) {
108            Ok(pos) => {
109                self.entries[pos].1 = value;
110            }
111            Err(pos) => {
112                self.entries.insert(pos, (idx, value));
113            }
114        }
115        Ok(())
116    }
117
118    /// Remove the entry at `idx`, returning its previous value if any.
119    /// Does **not** decrement `len` — the logical length is independent
120    /// of which indices are materialized.
121    pub fn remove(&mut self, idx: u64) -> Option<MissingOr<T>> {
122        match self.entries.binary_search_by_key(&idx, |(k, _)| *k) {
123            Ok(pos) => Some(self.entries.remove(pos).1),
124            Err(_) => None,
125        }
126    }
127
128    /// Set the logical length explicitly (does not affect entries).
129    pub fn set_len(&mut self, len: u64) -> Result<(), DecodeError> {
130        if len > N {
131            return Err(DecodeError::BoundExceeded { len, bound: N });
132        }
133        self.len = len;
134        Ok(())
135    }
136
137    /// Cache a precomputed subtree root at tree position `(depth, idx)`.
138    /// `depth == 0` corresponds to the root; deeper means closer to leaves.
139    /// O(n) — sorted insert into `cached_subtree_roots`.
140    pub fn cache_subtree_root(&mut self, depth: usize, idx: u64, root: [u8; 32]) {
141        let key = coord_to_key(depth, idx);
142        match self
143            .cached_subtree_roots
144            .binary_search_by_key(&key, |(k, _)| *k)
145        {
146            Ok(pos) => {
147                self.cached_subtree_roots[pos].1 = root;
148            }
149            Err(pos) => {
150                self.cached_subtree_roots.insert(pos, (key, root));
151            }
152        }
153    }
154
155    /// Number of cached subtree roots. Used by [`fmt::Debug`].
156    pub fn cached_subtree_roots_count(&self) -> usize {
157        self.cached_subtree_roots.len()
158    }
159
160    /// Iterator over cached subtree roots in sorted-by-key order.
161    pub fn cached_subtree_roots(&self) -> impl Iterator<Item = (u64, &[u8; 32])> {
162        self.cached_subtree_roots.iter().map(|(k, v)| (*k, v))
163    }
164
165    /// Internal: look up a cached subtree root by its `coord_to_key`-encoded key.
166    fn cached_subtree_root(&self, key: u64) -> Option<&[u8; 32]> {
167        match self
168            .cached_subtree_roots
169            .binary_search_by_key(&key, |(k, _)| *k)
170        {
171            Ok(pos) => Some(&self.cached_subtree_roots[pos].1),
172            Err(_) => None,
173        }
174    }
175}
176
177impl<T, const N: u64> Default for SparseList<T, N> {
178    fn default() -> Self {
179        Self::new()
180    }
181}
182
183#[inline]
184fn coord_to_key(depth: usize, idx: u64) -> u64 {
185    (1u64 << depth) | idx
186}
187
188impl<T: fmt::Debug, const N: u64> fmt::Debug for SparseList<T, N> {
189    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
190        f.debug_struct("SparseList")
191            .field("cap", &N)
192            .field("len", &self.len)
193            .field("materialized", &self.entries.len())
194            .field("cached_subtrees", &self.cached_subtree_roots.len())
195            .finish()
196    }
197}
198
199impl<T: Clone, const N: u64> Clone for SparseList<T, N> {
200    fn clone(&self) -> Self {
201        Self {
202            len: self.len,
203            entries: self.entries.clone(),
204            cached_subtree_roots: self.cached_subtree_roots.clone(),
205        }
206    }
207}
208
209impl<T: PartialEq, const N: u64> PartialEq for SparseList<T, N> {
210    fn eq(&self, other: &Self) -> bool {
211        if self.len != other.len
212            || self.entries.len() != other.entries.len()
213            || self.cached_subtree_roots.len() != other.cached_subtree_roots.len()
214        {
215            return false;
216        }
217        // Both vectors are sorted by key, so element-wise comparison
218        // suffices.
219        for ((ka, va), (kb, vb)) in self.entries.iter().zip(other.entries.iter()) {
220            if ka != kb || va != vb {
221                return false;
222            }
223        }
224        for ((ka, va), (kb, vb)) in self
225            .cached_subtree_roots
226            .iter()
227            .zip(other.cached_subtree_roots.iter())
228        {
229            if ka != kb || va != vb {
230                return false;
231            }
232        }
233        true
234    }
235}
236
237impl<T: Eq, const N: u64> Eq for SparseList<T, N> {}
238
239// --------------------------------------------------------------------------
240// Wire format: (len: u64, List<(u64, MissingOr<T>)>).
241// --------------------------------------------------------------------------
242//
243// The list element is an SSZ Container with a fixed `u64` key plus a
244// variable-length `MissingOr<T>` payload. We encode it inline rather than
245// going through `List<(u64, MissingOr<T>)>` to keep the wire format
246// independent of the workspace `(K, V)` tuple impl (which currently
247// doesn't exist; we have only the `BTreeMap<K, V>` impl in collections.rs).
248
249impl<T: Encode, const N: u64> Encode for SparseList<T, N> {
250    fn is_ssz_fixed_len() -> bool {
251        false
252    }
253    fn ssz_fixed_len() -> usize {
254        BYTES_PER_LENGTH_OFFSET
255    }
256    fn ssz_bytes_len(&self) -> usize {
257        // 8 (len) + 4 (entries-list offset) + offset table + payloads.
258        let n_entries = self.entries.len();
259        let entry_var_size: usize = self
260            .entries
261            .iter()
262            .map(|(_, v)| v.ssz_bytes_len())
263            .sum::<usize>();
264        // Each entry is (u64 key, MissingOr<T> value). u64 is fixed (8B);
265        // MissingOr<T> is variable, so each entry has a 4B offset slot.
266        // Plus the entries list itself is variable — we wrap it in an
267        // offset container with the leading `len: u64`.
268        // Layout:
269        //   [0..8]   len: u64
270        //   [8..12]  entries_offset: u32 (always 12)
271        //   [12..]   entries list payload
272        //
273        // entries list payload (variable list of variable elements):
274        //   per-entry: (u64 key, 4B value-offset)  ← 12 bytes each
275        //   then concatenated payloads
276        12 + n_entries * 12 + entry_var_size
277    }
278    fn ssz_append(&self, buf: &mut Vec<u8>) {
279        // len
280        buf.extend_from_slice(&self.len.to_le_bytes());
281        // entries_offset = 12
282        buf.extend_from_slice(&12u32.to_le_bytes());
283        // Now encode the entries list. SSZ list of container elements
284        // where the container is (u64 key, MissingOr<T> value).
285        encode_sparse_entries_list(&self.entries, buf);
286    }
287}
288
289fn encode_sparse_entries_list<T: Encode>(entries: &[(u64, MissingOr<T>)], buf: &mut Vec<u8>) {
290    let n = entries.len();
291    // Each entry container: (u64 key, MissingOr<T> value).
292    // Key is fixed (8B), value is variable. Per-entry container:
293    //   [0..8]: key
294    //   [8..12]: value-offset (= 12 → payload starts immediately)
295    //   [12..]: value payload
296    // So per-entry "fixed" portion is 12 bytes; variable portion is the
297    // MissingOr payload.
298    //
299    // To put this inside a list-of-variable-elements, we need a top-level
300    // offset table of `n` × 4 bytes pointing at each entry container,
301    // followed by the entry containers laid out back-to-back.
302
303    let header_size = n * BYTES_PER_LENGTH_OFFSET;
304    let start = buf.len();
305    buf.resize(start + header_size, 0u8);
306
307    let mut running = header_size as u32;
308    for (i, (key, val)) in entries.iter().enumerate() {
309        let off_pos = start + i * BYTES_PER_LENGTH_OFFSET;
310        buf[off_pos..off_pos + 4].copy_from_slice(&running.to_le_bytes());
311
312        let entry_start = buf.len();
313        // key
314        buf.extend_from_slice(&key.to_le_bytes());
315        // value-offset within this entry container
316        buf.extend_from_slice(&12u32.to_le_bytes());
317        // value payload
318        val.ssz_append(buf);
319
320        let entry_end = buf.len();
321        running = running
322            .checked_add((entry_end - entry_start) as u32)
323            .expect("ssz offset overflow");
324    }
325}
326
327impl<T: Decode, const N: u64> Decode for SparseList<T, N> {
328    fn is_ssz_fixed_len() -> bool {
329        false
330    }
331    fn ssz_fixed_len() -> usize {
332        BYTES_PER_LENGTH_OFFSET
333    }
334    fn from_ssz_bytes(bytes: &[u8]) -> Result<Self, DecodeError> {
335        if bytes.len() < 12 {
336            return Err(DecodeError::UnexpectedEof {
337                expected: 12,
338                actual: bytes.len(),
339            });
340        }
341        let mut len_bytes = [0u8; 8];
342        len_bytes.copy_from_slice(&bytes[0..8]);
343        let len = u64::from_le_bytes(len_bytes);
344        if len > N {
345            return Err(DecodeError::BoundExceeded { len, bound: N });
346        }
347        let entries_offset = u32::from_le_bytes(bytes[8..12].try_into().unwrap()) as usize;
348        if entries_offset != 12 {
349            return Err(DecodeError::InvalidOffset {
350                offset: entries_offset,
351                len: bytes.len(),
352                fixed: 12,
353            });
354        }
355        let payload = &bytes[12..];
356        let entries_in = decode_sparse_entries_list::<T>(payload)?;
357        let mut entries: Vec<(u64, MissingOr<T>)> = Vec::with_capacity(entries_in.len());
358        let mut prev_key: Option<u64> = None;
359        for (k, v) in entries_in {
360            if k >= N {
361                return Err(DecodeError::BoundExceeded {
362                    len: k + 1,
363                    bound: N,
364                });
365            }
366            if let Some(p) = prev_key
367                && k <= p
368            {
369                return Err(DecodeError::NotSorted);
370            }
371            prev_key = Some(k);
372            entries.push((k, v));
373        }
374        Ok(Self {
375            len,
376            entries,
377            cached_subtree_roots: Vec::new(),
378        })
379    }
380}
381
382fn decode_sparse_entries_list<T: Decode>(
383    bytes: &[u8],
384) -> Result<Vec<(u64, MissingOr<T>)>, DecodeError> {
385    if bytes.is_empty() {
386        return Ok(Vec::new());
387    }
388    if bytes.len() < 4 {
389        return Err(DecodeError::UnexpectedEof {
390            expected: 4,
391            actual: bytes.len(),
392        });
393    }
394    let first = u32::from_le_bytes(bytes[0..4].try_into().unwrap()) as usize;
395    if !first.is_multiple_of(BYTES_PER_LENGTH_OFFSET) || first > bytes.len() {
396        return Err(DecodeError::InvalidOffset {
397            offset: first,
398            len: bytes.len(),
399            fixed: 0,
400        });
401    }
402    let n = first / BYTES_PER_LENGTH_OFFSET;
403    let mut offsets = Vec::with_capacity(n + 1);
404    offsets.push(first);
405    for i in 1..n {
406        if bytes.len() < (i + 1) * BYTES_PER_LENGTH_OFFSET {
407            return Err(DecodeError::UnexpectedEof {
408                expected: (i + 1) * BYTES_PER_LENGTH_OFFSET,
409                actual: bytes.len(),
410            });
411        }
412        let off = u32::from_le_bytes(
413            bytes[i * BYTES_PER_LENGTH_OFFSET..(i + 1) * BYTES_PER_LENGTH_OFFSET]
414                .try_into()
415                .unwrap(),
416        ) as usize;
417        if off < *offsets.last().unwrap() {
418            return Err(DecodeError::OffsetsNotMonotonic {
419                prev: *offsets.last().unwrap(),
420                curr: off,
421            });
422        }
423        if off > bytes.len() {
424            return Err(DecodeError::InvalidOffset {
425                offset: off,
426                len: bytes.len(),
427                fixed: first,
428            });
429        }
430        offsets.push(off);
431    }
432    offsets.push(bytes.len());
433
434    let mut out = Vec::with_capacity(n);
435    for i in 0..n {
436        let entry_slice = &bytes[offsets[i]..offsets[i + 1]];
437        // Each entry: u64 key + value-offset (must be 12) + MissingOr<T> payload.
438        if entry_slice.len() < 12 {
439            return Err(DecodeError::UnexpectedEof {
440                expected: 12,
441                actual: entry_slice.len(),
442            });
443        }
444        let mut kbytes = [0u8; 8];
445        kbytes.copy_from_slice(&entry_slice[0..8]);
446        let key = u64::from_le_bytes(kbytes);
447        let value_offset = u32::from_le_bytes(entry_slice[8..12].try_into().unwrap()) as usize;
448        if value_offset != 12 {
449            return Err(DecodeError::InvalidOffset {
450                offset: value_offset,
451                len: entry_slice.len(),
452                fixed: 12,
453            });
454        }
455        let value = MissingOr::<T>::from_ssz_bytes(&entry_slice[12..])?;
456        out.push((key, value));
457    }
458    Ok(out)
459}
460
461// --------------------------------------------------------------------------
462// HashTreeRoot
463// --------------------------------------------------------------------------
464
465impl<T: HashTreeRoot + Encode, const N: u64> HashTreeRoot for SparseList<T, N> {
466    fn hash_tree_root<D: Digest<OutputSize = U32>>(&self) -> [u8; 32] {
467        // The chunk-tree depth is ceil_log2(N) — the depth at which there
468        // are exactly N leaves (one chunk per logical element, since we
469        // treat elements as composite types via `HashTreeRoot`).
470        // Special-case N <= 1 → depth 0 → root is the (zero or single)
471        // chunk.
472        let depth = ceil_log2(N);
473        let inner = self.compute_subtree_root::<D>(0, 0, depth);
474        mix_in_length::<D>(inner, self.len)
475    }
476}
477
478impl<T: HashTreeRoot, const N: u64> SparseList<T, N> {
479    /// Compute the merkle root of the subtree rooted at coordinate
480    /// `(node_depth, node_index_at_depth)` within a balanced binary chunk
481    /// tree of total depth `total_depth` (i.e., `2^total_depth` leaves).
482    ///
483    /// Uses iterative DFS with an explicit stack. Stack depth is bounded
484    /// by `total_depth` (≤ 64 for any u64 cap), so the stack is tiny.
485    fn compute_subtree_root<D: Digest<OutputSize = U32>>(
486        &self,
487        node_depth: usize,
488        node_index_at_depth: u64,
489        total_depth: usize,
490    ) -> [u8; 32] {
491        // Fast path: explicitly cached subtree root for this coordinate.
492        if let Some(cached) =
493            self.cached_subtree_root(coord_to_key(node_depth, node_index_at_depth))
494        {
495            return *cached;
496        }
497
498        // Leaf case: we're at the chunk level.
499        if node_depth == total_depth {
500            // Leaf index is `node_index_at_depth`. Return the chunk root.
501            return self
502                .get(node_index_at_depth)
503                .map(|e| e.hash_tree_root::<D>())
504                .unwrap_or([0u8; 32]);
505        }
506
507        // Determine the leaf-index range covered by this subtree.
508        let levels_below = total_depth - node_depth;
509        let leaves_per_subtree = 1u64 << levels_below;
510        let lo = node_index_at_depth * leaves_per_subtree;
511        let hi = lo + leaves_per_subtree; // exclusive
512
513        // If no materialized entries fall in [lo, hi), this subtree is
514        // entirely empty → it's a zero-hash at the appropriate depth.
515        // `partition_point` gives us the index of the first entry with
516        // key >= lo. If that entry's key is < hi, there's at least one
517        // materialized entry in range.
518        let pos = self.entries.partition_point(|(k, _)| *k < lo);
519        let has_entries = self.entries.get(pos).is_some_and(|(k, _)| *k < hi);
520        if !has_entries {
521            return zero_hash::<D>(levels_below);
522        }
523
524        // Recurse into children.
525        let left =
526            self.compute_subtree_root::<D>(node_depth + 1, node_index_at_depth * 2, total_depth);
527        let right = self.compute_subtree_root::<D>(
528            node_depth + 1,
529            node_index_at_depth * 2 + 1,
530            total_depth,
531        );
532        hash_pair::<D>(&left, &right)
533    }
534}