javm_cap/cache.rs
1//! `CacheDirectory<S>` — two-tier cap store.
2//!
3//! - **`blobs: HashMap<CapHash, Arc<Cap>>`** — content-addressed
4//! immutable caps. Pure cache: the host populates it; the kernel
5//! reads. If a lookup misses, the host hasn't published the cap yet.
6//!
7//! - **`instances: HashMap<u64, (CapRef, Arc<Cap>)>`** — identity-keyed
8//! mutable working state. The stored `CapRef` is the directory's
9//! self-reference; its `Arc::strong_count` is the number of live
10//! external holders + 1.
11//!
12//! Two callers exist: the Nub local backend (host's `Global`) and the
13//! Nub Hyperlight backend (guest's `Global` via talc). Both wrap
14//! `CacheDirectory<S>` in their own static / field; the directory's
15//! interior is `spin::Mutex`-protected so every public method takes
16//! `&self`.
17//!
18//! ## Cow + lazy promote
19//!
20//! Promotion (blob → instance) is a cheap `Arc::clone`:
21//!
22//! ```ignore
23//! let arc = blobs[&hash].clone(); // RC bump; no Cap copy.
24//! let id = self.next_ref;
25//! self.next_ref += 1;
26//! let capref = CapRef::new(id);
27//! instances.insert(id, (capref.clone(), arc)); // RC bump on capref.
28//! capref
29//! ```
30//!
31//! Mutation uses `Arc::make_mut`:
32//!
33//! ```ignore
34//! let mut arc = cache.get_instance(&capref).unwrap();
35//! let cap_mut = Arc::make_mut(&mut arc); // clones iff strong > 1.
36//! // ... mutate cap_mut ...
37//! cache.set_instance(&capref, arc);
38//! ```
39//!
40//! `Arc::make_mut` subsumes the legacy "sole-owner move-promote vs
41//! shared shallow-clone" branch — same decision, in fewer lines.
42//!
43//! ## GC sweep
44//!
45//! `sweep_instances` reclaims entries whose stored `CapRef.strong_count`
46//! is 1 (i.e., the directory is the sole holder). Removal drops the
47//! entry's `Arc<Cap>`; if that was the last strong ref to the Cap, the
48//! Cap drops; the Cap's `Ref(CapRef)` slot values drop too, decrementing
49//! more entries' refcounts. The sweep loops until a pass finds nothing.
50//!
51//! Cycles are structurally impossible (data-flow principle:
52//! `website/content/spec/discussions/data-flow-principle.md`), so the
53//! sweep is guaranteed to make forward progress.
54//!
55//! ## blob retention
56//!
57//! V0 blobs accumulate; the host pre-publishes every cap the invocation
58//! needs and lookups never miss. Future design: missing-blob lookups
59//! pause the kernel and ask the host to publish. Until that lands,
60//! `get_blob` returning `None` is treated as a hard failure by the
61//! caller.
62
63use alloc::sync::Arc;
64use alloc::vec::Vec;
65use core::hash::BuildHasher;
66
67use allocate::collections::{DefaultHashBuilder, HashMap};
68use spin::Mutex;
69
70use super::cap::{Cap, CapHash, CapHashOrRef, CapRef};
71use super::cap_hash::cap_hash;
72use super::image_cap::ImageConvertError;
73
74#[derive(Debug, thiserror::Error)]
75pub enum CacheError {
76 #[error("blob not found for hash")]
77 BlobMissing,
78 #[error("instance not found for ref {0}")]
79 InstanceMissing(u64),
80 #[error("image conversion failed: {0}")]
81 ImageConvertFailed(#[from] ImageConvertError),
82 #[error("paged data: page length mismatch (expected={expected}, got={got})")]
83 PageSizeMismatch { expected: u32, got: usize },
84 #[error("cnode slot index out of range")]
85 SlotOutOfRange,
86}
87
88pub struct CacheDirectory<S = DefaultHashBuilder> {
89 inner: Mutex<DirectoryInner<S>>,
90}
91
92struct DirectoryInner<S> {
93 blobs: HashMap<CapHash, Arc<Cap>, S>,
94 instances: HashMap<u64, (CapRef, Arc<Cap>), S>,
95 next_ref: u64,
96}
97
98impl CacheDirectory<DefaultHashBuilder> {
99 /// Construct an empty cache using the default per-process-randomized
100 /// hasher.
101 pub fn new() -> Self {
102 Self::with_hasher(DefaultHashBuilder::default())
103 }
104}
105
106impl Default for CacheDirectory<DefaultHashBuilder> {
107 fn default() -> Self {
108 Self::new()
109 }
110}
111
112impl<S: BuildHasher> CacheDirectory<S> {
113 /// Construct an empty cache with an explicit hasher.
114 pub fn with_hasher(hasher: S) -> Self
115 where
116 S: Clone,
117 {
118 Self {
119 inner: Mutex::new(DirectoryInner {
120 blobs: HashMap::with_hasher(hasher.clone()),
121 instances: HashMap::with_hasher(hasher),
122 // CapRef id 0 is reserved; ids start at 1.
123 next_ref: 1,
124 }),
125 }
126 }
127}
128
129impl<S> CacheDirectory<S> {
130 /// `const fn` constructor for static initialisation. Used by the
131 /// guest's `state_cache::CACHE` static. Takes both hashers
132 /// separately because `const fn` can't call `S::clone()` and not
133 /// every `BuildHasher` (notably `foldhash::fast::FixedState`)
134 /// implements `Copy`. Callers normally pass two identically-seeded
135 /// instances so both maps hash to the same buckets.
136 pub const fn new_const(blobs_hasher: S, instances_hasher: S) -> Self {
137 Self {
138 inner: Mutex::new(DirectoryInner {
139 blobs: HashMap::with_hasher(blobs_hasher),
140 instances: HashMap::with_hasher(instances_hasher),
141 next_ref: 1,
142 }),
143 }
144 }
145}
146
147impl<S: BuildHasher> CacheDirectory<S> {
148 /// Number of blob entries.
149 pub fn blob_count(&self) -> usize {
150 self.inner.lock().blobs.len()
151 }
152 /// Number of instance entries (live or unswept).
153 pub fn instance_count(&self) -> usize {
154 self.inner.lock().instances.len()
155 }
156
157 /// Whether the blobs tier holds an entry under `hash`.
158 pub fn contains_blob(&self, hash: &CapHash) -> bool {
159 self.inner.lock().blobs.contains_key(hash)
160 }
161
162 /// Get an `Arc::clone` of the blob cap at `hash`, or `None` if
163 /// absent.
164 pub fn get_blob(&self, hash: &CapHash) -> Option<Arc<Cap>> {
165 self.inner.lock().blobs.get(hash).cloned()
166 }
167
168 /// Get an `Arc::clone` of the instance cap at `capref`, or `None`
169 /// if absent.
170 pub fn get_instance(&self, capref: &CapRef) -> Option<Arc<Cap>> {
171 self.inner
172 .lock()
173 .instances
174 .get(&capref.id())
175 .map(|(_, arc)| arc.clone())
176 }
177
178 /// Snapshot the blob tier into a `Vec<(CapHash, Arc<Cap>)>`. Order
179 /// is unspecified (HashMap iteration); callers that need
180 /// deterministic order (state-root computations) sort by hash.
181 pub fn iter_blobs(&self) -> Vec<(CapHash, Arc<Cap>)> {
182 self.inner
183 .lock()
184 .blobs
185 .iter()
186 .map(|(h, arc)| (*h, arc.clone()))
187 .collect()
188 }
189
190 /// Polymorphic lookup that dispatches on the `CapHashOrRef` arm.
191 /// Returns an `Arc::clone` of the matching cap.
192 pub fn get(&self, key: CapHashOrRef) -> Option<Arc<Cap>> {
193 match key {
194 CapHashOrRef::Hash(h) => self.get_blob(&h),
195 CapHashOrRef::Ref(r) => self.get_instance(&r),
196 }
197 }
198
199 /// Replace the instance at `capref` with a fresh `Arc<Cap>`. The
200 /// old `Arc<Cap>` drops outside the lock (so any cascading
201 /// `Cap::drop → CapRef::drop` chain doesn't try to re-enter the
202 /// directory while we hold the guard).
203 pub fn set_instance(&self, capref: &CapRef, new_arc: Arc<Cap>) -> Result<(), CacheError> {
204 let _old = {
205 let mut g = self.inner.lock();
206 let entry = g
207 .instances
208 .get_mut(&capref.id())
209 .ok_or_else(|| CacheError::InstanceMissing(capref.id()))?;
210 // Swap in the new Arc, keeping the existing self-ref CapRef.
211 // The old Arc<Cap> is returned and dropped after the lock guard.
212 core::mem::replace(&mut entry.1, new_arc)
213 };
214 Ok(())
215 }
216
217 /// Hash + insert into blobs. Idempotent: re-puts of identical
218 /// content are a no-op. Returns the content hash.
219 pub fn put_cap(&self, cap: &Cap) -> Result<CapHash, CacheError> {
220 let hash = cap_hash(cap);
221 self.put_cap_with_hash(hash, cap)?;
222 Ok(hash)
223 }
224
225 /// Pre-hashed insert. Debug-asserts the claimed hash matches the
226 /// cap; release trusts the caller (the SSZ merkleize is the hot
227 /// cost on the publish path).
228 pub fn put_cap_with_hash(&self, hash: CapHash, cap: &Cap) -> Result<(), CacheError> {
229 debug_assert_eq!(
230 cap_hash(cap),
231 hash,
232 "put_cap_with_hash: claimed hash does not match cap content",
233 );
234 let mut g = self.inner.lock();
235 g.blobs.entry(hash).or_insert_with(|| Arc::new(cap.clone()));
236 Ok(())
237 }
238
239 /// Insert a freshly-built `Cap` as a new instance entry. Returns
240 /// the `CapRef` handle. The directory keeps its own clone of the
241 /// returned handle internally as the entry's self-reference; when
242 /// all external clones drop, `sweep_instances` will reclaim the
243 /// entry.
244 pub fn put_instance(&self, cap: Cap) -> CapRef {
245 self.put_instance_arc(Arc::new(cap))
246 }
247
248 /// `put_instance` variant that takes a pre-built `Arc<Cap>`. Used
249 /// internally by [`Self::promote_blob_to_instance`] to share the
250 /// blob's Arc rather than deep-copying.
251 fn put_instance_arc(&self, arc: Arc<Cap>) -> CapRef {
252 let mut g = self.inner.lock();
253 let id = g.next_ref;
254 g.next_ref = g.next_ref.checked_add(1).expect("CapRef space exhausted");
255 let capref = CapRef::new(id);
256 g.instances.insert(id, (capref.clone(), arc));
257 capref
258 }
259
260 /// Lazily promote a blob to a fresh instance entry. The blob and
261 /// the new instance entry share the same `Arc<Cap>` (no Cap
262 /// data deep-copy); the next `Arc::make_mut` call on either side
263 /// clones-on-write if both still hold the Arc.
264 ///
265 /// Returns `None` if the blob isn't published.
266 pub fn promote_blob_to_instance(&self, hash: &CapHash) -> Option<CapRef> {
267 let arc = self.get_blob(hash)?;
268 Some(self.put_instance_arc(arc))
269 }
270
271 /// **GC pass.** Walk the instances tier and remove every entry
272 /// whose stored `CapRef` has `strong_count == 1` (the directory is
273 /// the sole holder — no external `CapRef` clone exists). Loop
274 /// until a pass finds nothing to remove, so cascading drops can
275 /// orphan more entries which then get reclaimed in the next
276 /// iteration.
277 ///
278 /// Cycles are structurally impossible (data-flow principle), so
279 /// the loop is guaranteed to terminate.
280 pub fn sweep_instances(&self) {
281 loop {
282 let dead: Vec<u64> = {
283 let g = self.inner.lock();
284 g.instances
285 .iter()
286 .filter(|(_, (sr, _))| sr.strong_count() == 1)
287 .map(|(k, _)| *k)
288 .collect()
289 };
290 if dead.is_empty() {
291 break;
292 }
293 for id in dead {
294 let _removed = {
295 let mut g = self.inner.lock();
296 g.instances.remove(&id)
297 };
298 // _removed drops here, outside the lock. If its
299 // Arc<Cap> was the last strong ref, Cap::drop runs and
300 // cascades: nested CapRef::drop calls decrement other
301 // entries' refcounts. Those entries get reclaimed on
302 // the next pass.
303 }
304 }
305 }
306
307 /// Settle a `Ref`-keyed working entry: walk nested `Ref` targets,
308 /// resolve each to a `Hash`, then hash the surviving cap. Non-
309 /// Instance entries (Data / CNode) graduate from `instances` to
310 /// `blobs` under the computed hash; Instance entries stay in
311 /// instances (they're the live mutable state) and the returned
312 /// hash is a snapshot identifier.
313 ///
314 /// For `Hash`-keyed input, returns it unchanged.
315 pub fn settle(&self, key: CapHashOrRef) -> Result<CapHash, CacheError> {
316 match key {
317 CapHashOrRef::Hash(h) => Ok(h),
318 CapHashOrRef::Ref(r) => self.settle_ref(&r),
319 }
320 }
321
322 fn settle_ref(&self, capref: &CapRef) -> Result<CapHash, CacheError> {
323 // Step 1: settle any nested Refs the cap holds, rewriting
324 // them to Hash in place.
325 loop {
326 // Pull the cap out as an owned Arc<Cap> so we can read it
327 // outside the lock.
328 let arc = self
329 .get_instance(capref)
330 .ok_or_else(|| CacheError::InstanceMissing(capref.id()))?;
331 let nested = collect_ref_targets(&arc);
332 if nested.is_empty() {
333 break;
334 }
335 // Settle each nested ref. settle_ref recurses cleanly
336 // because nested refs are strictly downstream (data-flow
337 // principle).
338 let mut resolved: Vec<(CapRef, CapHash)> = Vec::with_capacity(nested.len());
339 for n in &nested {
340 let h = self.settle_ref(n)?;
341 resolved.push((n.clone(), h));
342 }
343 // Mutate the cap to rewrite the nested Refs to Hash. Need
344 // exclusive access to the Arc<Cap>; use Arc::make_mut on a
345 // fresh clone so the change is visible to other holders of
346 // the same id by replacing the directory's stored Arc.
347 let mut new_arc = arc.clone();
348 rewrite_ref_targets(Arc::make_mut(&mut new_arc), &resolved);
349 self.set_instance(capref, new_arc)?;
350 }
351
352 // Step 2: hash the (now Ref-free) cap.
353 let arc = self
354 .get_instance(capref)
355 .ok_or_else(|| CacheError::InstanceMissing(capref.id()))?;
356 let hash = cap_hash(&arc);
357
358 // Step 3: graduate non-Instance entries to blobs. Instance
359 // entries stay in instances (the snapshot hash is returned but
360 // the live cell isn't removed — the kernel may keep mutating).
361 let is_instance = matches!(&*arc, Cap::Instance(_));
362 if !is_instance {
363 // Insert into blobs if not already present; idempotent.
364 self.put_cap_with_hash(hash, &arc)?;
365 // Remove the instances entry. The local `capref`
366 // parameter still has one strong ref to it (the caller's);
367 // removing the directory's stored self-ref drops the
368 // entry's `Arc<Cap>` (one of two strong refs — the other
369 // is the local `arc` we're still holding). The cap data
370 // stays alive in `blobs` via the put_cap_with_hash above.
371 let _removed = {
372 let mut g = self.inner.lock();
373 g.instances.remove(&capref.id())
374 };
375 drop(_removed);
376 drop(arc);
377 }
378 Ok(hash)
379 }
380}
381
382// --- Helpers (free functions) ---
383
384/// Collect the directly-referenced `CapRef`s held by `cap`. Used by
385/// `settle` to know which sub-refs to resolve before hashing.
386fn collect_ref_targets(cap: &Cap) -> Vec<CapRef> {
387 let mut out: Vec<CapRef> = Vec::new();
388 match cap {
389 Cap::CNode(cn) => {
390 for (_, mo) in cn.slots.iter() {
391 if let ssz::MissingOr::Materialized(CapHashOrRef::Ref(r)) = mo {
392 out.push(r.clone());
393 }
394 }
395 }
396 Cap::Instance(inst) => {
397 if let CapHashOrRef::Ref(r) = &inst.root_cnode {
398 out.push(r.clone());
399 }
400 }
401 Cap::Data(_) | Cap::Image(_) | Cap::Type(_) => {}
402 }
403 out
404}
405
406/// Rewrite `cap`'s direct `CapHashOrRef::Ref(r)` targets to
407/// `CapHashOrRef::Hash(h)` according to the `(r, h)` mapping in
408/// `resolved`. Matches CapRef by id.
409fn rewrite_ref_targets(cap: &mut Cap, resolved: &[(CapRef, CapHash)]) {
410 let lookup = |r: &CapRef| -> Option<CapHash> {
411 resolved
412 .iter()
413 .find(|(k, _)| k.id() == r.id())
414 .map(|(_, h)| *h)
415 };
416 match cap {
417 Cap::CNode(cn) => {
418 for (_, mo) in cn.slots.iter_mut() {
419 if let ssz::MissingOr::Materialized(t) = mo
420 && let CapHashOrRef::Ref(r) = t
421 && let Some(h) = lookup(r)
422 {
423 *t = CapHashOrRef::Hash(h);
424 }
425 }
426 }
427 Cap::Instance(inst) => {
428 if let CapHashOrRef::Ref(r) = &inst.root_cnode
429 && let Some(h) = lookup(r)
430 {
431 inst.root_cnode = CapHashOrRef::Hash(h);
432 }
433 }
434 Cap::Data(_) | Cap::Image(_) | Cap::Type(_) => {}
435 }
436}