Skip to main content

javm_cap/
cache.rs

1//! `CacheDirectory<S>` — two-tier cap store.
2//!
3//! - **`blobs: HashMap<CapHash, Arc<Cap>>`** — content-addressed
4//!   immutable caps. Pure cache: the host populates it; the kernel
5//!   reads. If a lookup misses, the host hasn't published the cap yet.
6//!
7//! - **`instances: HashMap<u64, (CapRef, Arc<Cap>)>`** — identity-keyed
8//!   mutable working state. The stored `CapRef` is the directory's
9//!   self-reference; its `Arc::strong_count` is the number of live
10//!   external holders + 1.
11//!
12//! Two callers exist: the Nub local backend (host's `Global`) and the
13//! Nub Hyperlight backend (guest's `Global` via talc). Both wrap
14//! `CacheDirectory<S>` in their own static / field; the directory's
15//! interior is `spin::Mutex`-protected so every public method takes
16//! `&self`.
17//!
18//! ## Cow + lazy promote
19//!
20//! Promotion (blob → instance) is a cheap `Arc::clone`:
21//!
22//! ```ignore
23//! let arc = blobs[&hash].clone();        // RC bump; no Cap copy.
24//! let id = self.next_ref;
25//! self.next_ref += 1;
26//! let capref = CapRef::new(id);
27//! instances.insert(id, (capref.clone(), arc));   // RC bump on capref.
28//! capref
29//! ```
30//!
31//! Mutation uses `Arc::make_mut`:
32//!
33//! ```ignore
34//! let mut arc = cache.get_instance(&capref).unwrap();
35//! let cap_mut = Arc::make_mut(&mut arc);   // clones iff strong > 1.
36//! // ... mutate cap_mut ...
37//! cache.set_instance(&capref, arc);
38//! ```
39//!
40//! `Arc::make_mut` subsumes the legacy "sole-owner move-promote vs
41//! shared shallow-clone" branch — same decision, in fewer lines.
42//!
43//! ## GC sweep
44//!
45//! `sweep_instances` reclaims entries whose stored `CapRef.strong_count`
46//! is 1 (i.e., the directory is the sole holder). Removal drops the
47//! entry's `Arc<Cap>`; if that was the last strong ref to the Cap, the
48//! Cap drops; the Cap's `Ref(CapRef)` slot values drop too, decrementing
49//! more entries' refcounts. The sweep loops until a pass finds nothing.
50//!
51//! Cycles are structurally impossible (data-flow principle:
52//! `website/content/spec/discussions/data-flow-principle.md`), so the
53//! sweep is guaranteed to make forward progress.
54//!
55//! ## blob retention
56//!
57//! V0 blobs accumulate; the host pre-publishes every cap the invocation
58//! needs and lookups never miss. Future design: missing-blob lookups
59//! pause the kernel and ask the host to publish. Until that lands,
60//! `get_blob` returning `None` is treated as a hard failure by the
61//! caller.
62
63use alloc::sync::Arc;
64use alloc::vec::Vec;
65use core::hash::BuildHasher;
66
67use allocate::collections::{DefaultHashBuilder, HashMap};
68use spin::Mutex;
69
70use super::cap::{Cap, CapHash, CapHashOrRef, CapRef};
71use super::cap_hash::cap_hash;
72use super::image_cap::ImageConvertError;
73
74#[derive(Debug, thiserror::Error)]
75pub enum CacheError {
76    #[error("blob not found for hash")]
77    BlobMissing,
78    #[error("instance not found for ref {0}")]
79    InstanceMissing(u64),
80    #[error("image conversion failed: {0}")]
81    ImageConvertFailed(#[from] ImageConvertError),
82    #[error("paged data: page length mismatch (expected={expected}, got={got})")]
83    PageSizeMismatch { expected: u32, got: usize },
84    #[error("cnode slot index out of range")]
85    SlotOutOfRange,
86}
87
88pub struct CacheDirectory<S = DefaultHashBuilder> {
89    inner: Mutex<DirectoryInner<S>>,
90}
91
92struct DirectoryInner<S> {
93    blobs: HashMap<CapHash, Arc<Cap>, S>,
94    instances: HashMap<u64, (CapRef, Arc<Cap>), S>,
95    next_ref: u64,
96}
97
98impl CacheDirectory<DefaultHashBuilder> {
99    /// Construct an empty cache using the default per-process-randomized
100    /// hasher.
101    pub fn new() -> Self {
102        Self::with_hasher(DefaultHashBuilder::default())
103    }
104}
105
106impl Default for CacheDirectory<DefaultHashBuilder> {
107    fn default() -> Self {
108        Self::new()
109    }
110}
111
112impl<S: BuildHasher> CacheDirectory<S> {
113    /// Construct an empty cache with an explicit hasher.
114    pub fn with_hasher(hasher: S) -> Self
115    where
116        S: Clone,
117    {
118        Self {
119            inner: Mutex::new(DirectoryInner {
120                blobs: HashMap::with_hasher(hasher.clone()),
121                instances: HashMap::with_hasher(hasher),
122                // CapRef id 0 is reserved; ids start at 1.
123                next_ref: 1,
124            }),
125        }
126    }
127}
128
129impl<S> CacheDirectory<S> {
130    /// `const fn` constructor for static initialisation. Used by the
131    /// guest's `state_cache::CACHE` static. Takes both hashers
132    /// separately because `const fn` can't call `S::clone()` and not
133    /// every `BuildHasher` (notably `foldhash::fast::FixedState`)
134    /// implements `Copy`. Callers normally pass two identically-seeded
135    /// instances so both maps hash to the same buckets.
136    pub const fn new_const(blobs_hasher: S, instances_hasher: S) -> Self {
137        Self {
138            inner: Mutex::new(DirectoryInner {
139                blobs: HashMap::with_hasher(blobs_hasher),
140                instances: HashMap::with_hasher(instances_hasher),
141                next_ref: 1,
142            }),
143        }
144    }
145}
146
147impl<S: BuildHasher> CacheDirectory<S> {
148    /// Number of blob entries.
149    pub fn blob_count(&self) -> usize {
150        self.inner.lock().blobs.len()
151    }
152    /// Number of instance entries (live or unswept).
153    pub fn instance_count(&self) -> usize {
154        self.inner.lock().instances.len()
155    }
156
157    /// Whether the blobs tier holds an entry under `hash`.
158    pub fn contains_blob(&self, hash: &CapHash) -> bool {
159        self.inner.lock().blobs.contains_key(hash)
160    }
161
162    /// Get an `Arc::clone` of the blob cap at `hash`, or `None` if
163    /// absent.
164    pub fn get_blob(&self, hash: &CapHash) -> Option<Arc<Cap>> {
165        self.inner.lock().blobs.get(hash).cloned()
166    }
167
168    /// Get an `Arc::clone` of the instance cap at `capref`, or `None`
169    /// if absent.
170    pub fn get_instance(&self, capref: &CapRef) -> Option<Arc<Cap>> {
171        self.inner
172            .lock()
173            .instances
174            .get(&capref.id())
175            .map(|(_, arc)| arc.clone())
176    }
177
178    /// Snapshot the blob tier into a `Vec<(CapHash, Arc<Cap>)>`. Order
179    /// is unspecified (HashMap iteration); callers that need
180    /// deterministic order (state-root computations) sort by hash.
181    pub fn iter_blobs(&self) -> Vec<(CapHash, Arc<Cap>)> {
182        self.inner
183            .lock()
184            .blobs
185            .iter()
186            .map(|(h, arc)| (*h, arc.clone()))
187            .collect()
188    }
189
190    /// Polymorphic lookup that dispatches on the `CapHashOrRef` arm.
191    /// Returns an `Arc::clone` of the matching cap.
192    pub fn get(&self, key: CapHashOrRef) -> Option<Arc<Cap>> {
193        match key {
194            CapHashOrRef::Hash(h) => self.get_blob(&h),
195            CapHashOrRef::Ref(r) => self.get_instance(&r),
196        }
197    }
198
199    /// Replace the instance at `capref` with a fresh `Arc<Cap>`. The
200    /// old `Arc<Cap>` drops outside the lock (so any cascading
201    /// `Cap::drop → CapRef::drop` chain doesn't try to re-enter the
202    /// directory while we hold the guard).
203    pub fn set_instance(&self, capref: &CapRef, new_arc: Arc<Cap>) -> Result<(), CacheError> {
204        let _old = {
205            let mut g = self.inner.lock();
206            let entry = g
207                .instances
208                .get_mut(&capref.id())
209                .ok_or_else(|| CacheError::InstanceMissing(capref.id()))?;
210            // Swap in the new Arc, keeping the existing self-ref CapRef.
211            // The old Arc<Cap> is returned and dropped after the lock guard.
212            core::mem::replace(&mut entry.1, new_arc)
213        };
214        Ok(())
215    }
216
217    /// Hash + insert into blobs. Idempotent: re-puts of identical
218    /// content are a no-op. Returns the content hash.
219    pub fn put_cap(&self, cap: &Cap) -> Result<CapHash, CacheError> {
220        let hash = cap_hash(cap);
221        self.put_cap_with_hash(hash, cap)?;
222        Ok(hash)
223    }
224
225    /// Pre-hashed insert. Debug-asserts the claimed hash matches the
226    /// cap; release trusts the caller (the SSZ merkleize is the hot
227    /// cost on the publish path).
228    pub fn put_cap_with_hash(&self, hash: CapHash, cap: &Cap) -> Result<(), CacheError> {
229        debug_assert_eq!(
230            cap_hash(cap),
231            hash,
232            "put_cap_with_hash: claimed hash does not match cap content",
233        );
234        let mut g = self.inner.lock();
235        g.blobs.entry(hash).or_insert_with(|| Arc::new(cap.clone()));
236        Ok(())
237    }
238
239    /// Insert a freshly-built `Cap` as a new instance entry. Returns
240    /// the `CapRef` handle. The directory keeps its own clone of the
241    /// returned handle internally as the entry's self-reference; when
242    /// all external clones drop, `sweep_instances` will reclaim the
243    /// entry.
244    pub fn put_instance(&self, cap: Cap) -> CapRef {
245        self.put_instance_arc(Arc::new(cap))
246    }
247
248    /// `put_instance` variant that takes a pre-built `Arc<Cap>`. Used
249    /// internally by [`Self::promote_blob_to_instance`] to share the
250    /// blob's Arc rather than deep-copying.
251    fn put_instance_arc(&self, arc: Arc<Cap>) -> CapRef {
252        let mut g = self.inner.lock();
253        let id = g.next_ref;
254        g.next_ref = g.next_ref.checked_add(1).expect("CapRef space exhausted");
255        let capref = CapRef::new(id);
256        g.instances.insert(id, (capref.clone(), arc));
257        capref
258    }
259
260    /// Lazily promote a blob to a fresh instance entry. The blob and
261    /// the new instance entry share the same `Arc<Cap>` (no Cap
262    /// data deep-copy); the next `Arc::make_mut` call on either side
263    /// clones-on-write if both still hold the Arc.
264    ///
265    /// Returns `None` if the blob isn't published.
266    pub fn promote_blob_to_instance(&self, hash: &CapHash) -> Option<CapRef> {
267        let arc = self.get_blob(hash)?;
268        Some(self.put_instance_arc(arc))
269    }
270
271    /// **GC pass.** Walk the instances tier and remove every entry
272    /// whose stored `CapRef` has `strong_count == 1` (the directory is
273    /// the sole holder — no external `CapRef` clone exists). Loop
274    /// until a pass finds nothing to remove, so cascading drops can
275    /// orphan more entries which then get reclaimed in the next
276    /// iteration.
277    ///
278    /// Cycles are structurally impossible (data-flow principle), so
279    /// the loop is guaranteed to terminate.
280    pub fn sweep_instances(&self) {
281        loop {
282            let dead: Vec<u64> = {
283                let g = self.inner.lock();
284                g.instances
285                    .iter()
286                    .filter(|(_, (sr, _))| sr.strong_count() == 1)
287                    .map(|(k, _)| *k)
288                    .collect()
289            };
290            if dead.is_empty() {
291                break;
292            }
293            for id in dead {
294                let _removed = {
295                    let mut g = self.inner.lock();
296                    g.instances.remove(&id)
297                };
298                // _removed drops here, outside the lock. If its
299                // Arc<Cap> was the last strong ref, Cap::drop runs and
300                // cascades: nested CapRef::drop calls decrement other
301                // entries' refcounts. Those entries get reclaimed on
302                // the next pass.
303            }
304        }
305    }
306
307    /// Settle a `Ref`-keyed working entry: walk nested `Ref` targets,
308    /// resolve each to a `Hash`, then hash the surviving cap. Non-
309    /// Instance entries (Data / CNode) graduate from `instances` to
310    /// `blobs` under the computed hash; Instance entries stay in
311    /// instances (they're the live mutable state) and the returned
312    /// hash is a snapshot identifier.
313    ///
314    /// For `Hash`-keyed input, returns it unchanged.
315    pub fn settle(&self, key: CapHashOrRef) -> Result<CapHash, CacheError> {
316        match key {
317            CapHashOrRef::Hash(h) => Ok(h),
318            CapHashOrRef::Ref(r) => self.settle_ref(&r),
319        }
320    }
321
322    fn settle_ref(&self, capref: &CapRef) -> Result<CapHash, CacheError> {
323        // Step 1: settle any nested Refs the cap holds, rewriting
324        // them to Hash in place.
325        loop {
326            // Pull the cap out as an owned Arc<Cap> so we can read it
327            // outside the lock.
328            let arc = self
329                .get_instance(capref)
330                .ok_or_else(|| CacheError::InstanceMissing(capref.id()))?;
331            let nested = collect_ref_targets(&arc);
332            if nested.is_empty() {
333                break;
334            }
335            // Settle each nested ref. settle_ref recurses cleanly
336            // because nested refs are strictly downstream (data-flow
337            // principle).
338            let mut resolved: Vec<(CapRef, CapHash)> = Vec::with_capacity(nested.len());
339            for n in &nested {
340                let h = self.settle_ref(n)?;
341                resolved.push((n.clone(), h));
342            }
343            // Mutate the cap to rewrite the nested Refs to Hash. Need
344            // exclusive access to the Arc<Cap>; use Arc::make_mut on a
345            // fresh clone so the change is visible to other holders of
346            // the same id by replacing the directory's stored Arc.
347            let mut new_arc = arc.clone();
348            rewrite_ref_targets(Arc::make_mut(&mut new_arc), &resolved);
349            self.set_instance(capref, new_arc)?;
350        }
351
352        // Step 2: hash the (now Ref-free) cap.
353        let arc = self
354            .get_instance(capref)
355            .ok_or_else(|| CacheError::InstanceMissing(capref.id()))?;
356        let hash = cap_hash(&arc);
357
358        // Step 3: graduate non-Instance entries to blobs. Instance
359        // entries stay in instances (the snapshot hash is returned but
360        // the live cell isn't removed — the kernel may keep mutating).
361        let is_instance = matches!(&*arc, Cap::Instance(_));
362        if !is_instance {
363            // Insert into blobs if not already present; idempotent.
364            self.put_cap_with_hash(hash, &arc)?;
365            // Remove the instances entry. The local `capref`
366            // parameter still has one strong ref to it (the caller's);
367            // removing the directory's stored self-ref drops the
368            // entry's `Arc<Cap>` (one of two strong refs — the other
369            // is the local `arc` we're still holding). The cap data
370            // stays alive in `blobs` via the put_cap_with_hash above.
371            let _removed = {
372                let mut g = self.inner.lock();
373                g.instances.remove(&capref.id())
374            };
375            drop(_removed);
376            drop(arc);
377        }
378        Ok(hash)
379    }
380}
381
382// --- Helpers (free functions) ---
383
384/// Collect the directly-referenced `CapRef`s held by `cap`. Used by
385/// `settle` to know which sub-refs to resolve before hashing.
386fn collect_ref_targets(cap: &Cap) -> Vec<CapRef> {
387    let mut out: Vec<CapRef> = Vec::new();
388    match cap {
389        Cap::CNode(cn) => {
390            for (_, mo) in cn.slots.iter() {
391                if let ssz::MissingOr::Materialized(CapHashOrRef::Ref(r)) = mo {
392                    out.push(r.clone());
393                }
394            }
395        }
396        Cap::Instance(inst) => {
397            if let CapHashOrRef::Ref(r) = &inst.root_cnode {
398                out.push(r.clone());
399            }
400        }
401        Cap::Data(_) | Cap::Image(_) | Cap::Type(_) => {}
402    }
403    out
404}
405
406/// Rewrite `cap`'s direct `CapHashOrRef::Ref(r)` targets to
407/// `CapHashOrRef::Hash(h)` according to the `(r, h)` mapping in
408/// `resolved`. Matches CapRef by id.
409fn rewrite_ref_targets(cap: &mut Cap, resolved: &[(CapRef, CapHash)]) {
410    let lookup = |r: &CapRef| -> Option<CapHash> {
411        resolved
412            .iter()
413            .find(|(k, _)| k.id() == r.id())
414            .map(|(_, h)| *h)
415    };
416    match cap {
417        Cap::CNode(cn) => {
418            for (_, mo) in cn.slots.iter_mut() {
419                if let ssz::MissingOr::Materialized(t) = mo
420                    && let CapHashOrRef::Ref(r) = t
421                    && let Some(h) = lookup(r)
422                {
423                    *t = CapHashOrRef::Hash(h);
424                }
425            }
426        }
427        Cap::Instance(inst) => {
428            if let CapHashOrRef::Ref(r) = &inst.root_cnode
429                && let Some(h) = lookup(r)
430            {
431                inst.root_cnode = CapHashOrRef::Hash(h);
432            }
433        }
434        Cap::Data(_) | Cap::Image(_) | Cap::Type(_) => {}
435    }
436}