javm_cap/cap/image.rs
1//! `ImageCap` — Image cap.
2//!
3//! Stores a single code region, endpoints, mappings, and slot references as
4//! separate `Vec<T>` allocations. Allocation count per ImageCap is
5//! bounded regardless of content size; we accept that in exchange for
6//! direct field accessors.
7
8use alloc::vec::Vec;
9
10use crate::slot::{Key, SlotPath};
11
12use super::{CapHash, MAX_SOURCE_DEPTH, NUM_REGS};
13
14/// # Validation model: structure is eager, semantics are lazy
15///
16/// An `ImageCap` is admitted from untrusted input under a two-layer
17/// discipline:
18///
19/// - **Structure — validated eagerly** (here / in [`image_cap`], the
20/// "deblob"). The metadata that frames execution: `code` *length*
21/// (`≤ MAX_CODE_SIZE`), memory-mapping bounds, slot indices, source-path
22/// depth, endpoint indices. A malformed structural field has no clean
23/// execution point to fault on — it would diverge between engines or
24/// panic the host — so it is rejected at construction. This is cheap
25/// (`O(#endpoints + #mappings + #slots)`, it never scans the code) and
26/// therefore compatible with lazy compilation.
27///
28/// - **Semantics — validated lazily** (at execution, by both engines
29/// identically). The instruction stream itself: illegal/forbidden
30/// encodings, and `jal`/branch/`jalr`/`entry_pc` targets. These are
31/// **not** rejected at admission — any `code` bytes are accepted. A
32/// forbidden encoding decodes as illegal and an off-`bb_start` target is
33/// refused only *when reached*, as `ε = panic`. Lazy (not eager
34/// deblob) because, without an instruction bitmask, a linear validator
35/// can't tell code from data — eager rejection would reject legitimate
36/// code-as-data; lazy also keeps admission version-independent (a future
37/// ISA extension forks only at execution, never the cap set at
38/// admission) and preserves lazy compilation. The consensus requirement
39/// is that the two engines *agree* on what panics, not that the bytes
40/// are pre-screened. The producer toolchain still rejects forbidden
41/// encodings at build time as a diagnostic — that is UX, not a
42/// consensus rule.
43#[derive(Debug, ssz_derive::HashTreeRoot, rkyv::Archive, rkyv::Serialize, rkyv::Deserialize)]
44pub struct ImageCap {
45 /// The (single) code region: raw RV+C+custom-0 bytes, page-aligned
46 /// so the kernel can direct-map it RO at the fixed protocol
47 /// constant [`crate::layout::CODE_BASE`]. Empty for codeless
48 /// images. See [`ImageCap::code_mapping`].
49 pub code: Vec<u8>,
50 /// Endpoint definitions, keyed by a [`Key`] selector. A sparse, sorted
51 /// association list (`Dict`-style — kept sorted by key, no fixed capacity);
52 /// an absent key is an undefined endpoint. There is no dense array and no
53 /// `entry_pc == 0` sentinel, so an endpoint may legitimately start at code
54 /// offset 0. (`Vec<(Key, _)>` rather than `BTreeMap` because the rkyv wire
55 /// form has no `Ord` on the archived key.)
56 pub endpoints: Vec<(Key, EndpointDef)>,
57 /// Memory mappings.
58 pub mappings: Vec<MemoryMapping>,
59 /// Pinned read-only slots (Cap::Data / Cap::Image). Images only
60 /// ever reference content-addressed caps, so the target is a
61 /// plain `CapHash`.
62 pub pinned: Vec<ImageSlotEntry>,
63 /// Initial mutable slot state for non-pinned slots.
64 pub initial: Vec<ImageSlotEntry>,
65 /// Slot holding `Cap::Instance[YieldReceiver]` (the catch-set), if any.
66 pub yield_receiver_slot: Option<Key>,
67 /// Cnode slots holding the `Cap::Instance[Gas{meter_key}]` unit handles,
68 /// consulted in order. See [`crate::image::Image::gas_slots`].
69 pub gas_slots: Vec<Key>,
70 /// Cnode slots holding the `Cap::Instance[Quota{quota_key}]` unit handles.
71 pub quota_slots: Vec<Key>,
72}
73
74// Manual Clone: the derived impl would `Vec::clone` the `code` bytes,
75// which goes through `Global::alloc` at the default 1-byte alignment
76// for `[u8]`. The kernel direct-maps the code region into a ring-3 PT
77// and asserts `phys.is_multiple_of(PAGE_SIZE)`, so a cloned buffer on
78// an unaligned page would panic. Re-allocate `code` through
79// `alloc_page_aligned_code` to preserve the invariant across clones
80// (mirrors `DataContent`'s manual Clone). Other fields clone normally.
81impl Clone for ImageCap {
82 fn clone(&self) -> Self {
83 Self {
84 code: alloc_page_aligned_code(&self.code),
85 endpoints: self.endpoints.clone(),
86 mappings: self.mappings.clone(),
87 pinned: self.pinned.clone(),
88 initial: self.initial.clone(),
89 yield_receiver_slot: self.yield_receiver_slot.clone(),
90 gas_slots: self.gas_slots.clone(),
91 quota_slots: self.quota_slots.clone(),
92 }
93 }
94}
95
96impl ImageCap {
97 /// The executable code region as `(code_base, bytes)`. `code_base`
98 /// is the fixed protocol constant [`crate::layout::CODE_BASE`], so a
99 /// PVM PC is `code_base + byte_offset`. `None` if the image declares
100 /// no code (empty region) — such an image cannot execute.
101 pub fn code_mapping(&self) -> Option<(u32, &[u8])> {
102 if self.code.is_empty() {
103 return None;
104 }
105 Some((crate::layout::CODE_BASE, self.code.as_slice()))
106 }
107
108 /// True iff the memory mapping starting at guest VA `start` draws
109 /// from a pinned (read-only) slot, so it must be laid read-only — a
110 /// guest store to it faults. Mirrors the recompiler's pinned-vs-
111 /// initial slot classification (`nub-arch-x86` `build_runtime`).
112 /// Derived from [`Self::pinned`] at lay time, so a mapping carries no
113 /// per-mapping permission field; the interpreter drivers (`javm`
114 /// `build_entry`, `nub-arch-local`) call this so they classify
115 /// identically to the recompiler.
116 pub fn mapping_is_pinned(&self, start: u32) -> bool {
117 self.mappings.iter().any(|m| {
118 m.start as u32 == start
119 && m.source
120 .steps()
121 .first()
122 .is_some_and(|root| self.pinned.iter().any(|p| &p.slot == root))
123 })
124 }
125}
126
127/// Endpoint definition. Dense `initial_regs` array; index `i`
128/// corresponds to PVM register `φ[i]`. `0` is "use default" (same
129/// semantics as the spec's old `BTreeMap<u8, u64>` when the key is
130/// absent).
131// `Key` is heap-spillable (`SmallVec`), so `EndpointDef` is no longer
132// `Copy`; it threads through the cap layer by value/clone like the other
133// `Key`-bearing structs.
134#[derive(
135 Clone,
136 Debug,
137 PartialEq,
138 Eq,
139 ssz_derive::Encode,
140 ssz_derive::Decode,
141 ssz_derive::HashTreeRoot,
142 rkyv::Archive,
143 rkyv::Serialize,
144 rkyv::Deserialize,
145)]
146pub struct EndpointDef {
147 pub entry_pc: u64,
148 pub stack_top: u64,
149 pub arg_cnode_slot: Key,
150 pub arg_cnode_size: u8,
151 pub initial_regs: [u64; NUM_REGS],
152}
153
154/// One mapped region. The kernel resolves `source` (a [`SlotPath`] to a
155/// `Cap::Data`) at instance start, reads the bytes, and lays them at
156/// `[start, start + size)`.
157///
158/// `source` is a variable-length [`SlotPath`] (was a fixed `[SlotIdx;
159/// MAX_SOURCE_DEPTH]` + length), so `MemoryMapping` is now a
160/// variable-length SSZ container with a fully derived codec — no hand-rolled
161/// SSZ. The eager depth bound (`≤ MAX_SOURCE_DEPTH`) is enforced in
162/// [`image_cap`] at deblob, not in the wire decode.
163#[derive(
164 Clone,
165 Debug,
166 PartialEq,
167 Eq,
168 ssz_derive::Encode,
169 ssz_derive::Decode,
170 ssz_derive::HashTreeRoot,
171 rkyv::Archive,
172 rkyv::Serialize,
173 rkyv::Deserialize,
174)]
175pub struct MemoryMapping {
176 pub start: u64,
177 pub size: u64,
178 /// Cnode path resolving to the `Cap::Data` whose bytes back this region.
179 pub source: SlotPath,
180}
181
182impl MemoryMapping {
183 /// The cnode path steps — the keys to walk to the `Cap::Data` backing
184 /// this mapping. Non-empty for a well-formed mapping.
185 pub fn path(&self) -> &[Key] {
186 self.source.steps()
187 }
188}
189
190/// `(slot_key, cap_hash)` pair used by Image's `pinned` and
191/// `initial` arrays. References content-addressed caps only.
192///
193/// `Key` is heap-spillable, so this is no longer `Copy`.
194#[derive(
195 Clone,
196 Debug,
197 PartialEq,
198 Eq,
199 ssz_derive::Encode,
200 ssz_derive::Decode,
201 ssz_derive::HashTreeRoot,
202 rkyv::Archive,
203 rkyv::Serialize,
204 rkyv::Deserialize,
205)]
206pub struct ImageSlotEntry {
207 pub slot: Key,
208 pub cap_hash: CapHash,
209}
210
211/// Failure modes when converting an SSZ-encoded [`crate::image::Image`]
212/// into an [`ImageCap`]. The conversion preserves the slots and metadata
213/// that remain cap-resident, drops only per-endpoint `arg_registers`, and
214/// is constrained in others — these errors flag the constraint violations.
215#[derive(Debug, thiserror::Error)]
216pub enum ImageConvertError {
217 #[error("code region {0} bytes exceeds MAX_CODE_SIZE ({1})")]
218 CodeTooLarge(usize, u32),
219 #[error("code ref [{0}, {0}+{1}) out of arena bounds (arena {2} bytes)")]
220 CodeRefOutOfRange(u32, u32, usize),
221 #[error("data desc invalid: {0:?}")]
222 DataDesc(crate::image::DataDescError),
223 #[error("memory mapping source path empty")]
224 SourcePathEmpty,
225 #[error("memory mapping source path too deep (steps={0} > MAX_SOURCE_DEPTH)")]
226 SourcePathTooDeep(usize),
227 #[error("register index {0} >= NUM_REGS")]
228 RegisterIndexOutOfRange(u8),
229}
230
231/// Build an [`ImageCap`] from the SSZ-encoded [`crate::image::Image`]
232/// shape. The Data content referenced by pinned and initial slots must
233/// already be published — pass the resolved `(SlotIdx, CapHash)` pairs
234/// in `pinned_hashes` and `initial_hashes`. The builder sorts both lists
235/// by slot index.
236///
237/// **Lossy fields (intentionally dropped):**
238/// - per-endpoint `arg_registers`: the calling convention is implicit
239/// in the new shape. `gas_slots` and `quota_slots` remain image
240/// metadata and are carried through unchanged.
241///
242/// **Field mappings:**
243/// - Endpoints are stored in a sparse `Key -> EndpointDef` map (no fixed
244/// capacity). `stack_top` is extracted from the old `initial_regs[1]`
245/// (RISC-V SP convention); `arg_cnode_slot` defaults to `Key::from(0)`.
246/// - `MemoryMapping.source` (a [`SlotPath`]) is carried through verbatim;
247/// paths that are empty or deeper than `MAX_SOURCE_DEPTH` error.
248pub fn image_cap(
249 image: &crate::image::Image,
250 pinned_hashes: &[(Key, CapHash)],
251 initial_hashes: &[(Key, CapHash)],
252) -> Result<ImageCap, ImageConvertError> {
253 // Structural invariant (eager): the code region maps RO at
254 // `[CODE_BASE, DATA_BASE)`, so it must fit under `MAX_CODE_SIZE` —
255 // otherwise a high code offset would alias the data region. The
256 // *contents* of `code` are not validated here (instruction legality
257 // is checked lazily, at execution); only its size is a structural
258 // bound. Checked before the page-aligned copy so an oversized blob
259 // is rejected without allocating it.
260 let code_len = image.code.len as usize;
261 if code_len > crate::layout::MAX_CODE_SIZE as usize {
262 return Err(ImageConvertError::CodeTooLarge(
263 code_len,
264 crate::layout::MAX_CODE_SIZE,
265 ));
266 }
267 // The code window `[arena_off, arena_off + len)` must lie within the
268 // arena (untrusted wire input — fail loud, never slice out of range).
269 let code_in_bounds = (image.code.arena_off as usize)
270 .checked_add(code_len)
271 .is_some_and(|end| end <= image.arena.len());
272 if !code_in_bounds {
273 return Err(ImageConvertError::CodeRefOutOfRange(
274 image.code.arena_off,
275 image.code.len,
276 image.arena.len(),
277 ));
278 }
279 // Every pinned/initial data descriptor must reference the arena
280 // soundly (page-aligned, in-bounds, page_index < page_count, canonical
281 // page order) before any downstream materialization slices the arena.
282 for slot in image.pinned_slots.values() {
283 if let crate::image::PinnedCap::Data { desc } = slot {
284 desc.validate(image.arena.len())
285 .map_err(ImageConvertError::DataDesc)?;
286 }
287 }
288 for desc in image.initial_slots.values() {
289 desc.validate(image.arena.len())
290 .map_err(ImageConvertError::DataDesc)?;
291 }
292 // Code: page-aligned copy so the kernel can direct-map it RO at
293 // `layout::CODE_BASE`.
294 let code = alloc_page_aligned_code(image.code_bytes());
295
296 // Endpoints: a sparse, sorted `Key -> EndpointDef` association list (no
297 // fixed capacity, no dense `entry_pc == 0` sentinel — presence is what
298 // defines an endpoint). `image.endpoints` is a BTreeMap, so iterating it
299 // yields keys in sorted order and the resulting Vec stays sorted by Key.
300 let mut endpoints = Vec::with_capacity(image.endpoints.len());
301 for (key, ep) in &image.endpoints {
302 let mut initial_regs = [0u64; NUM_REGS];
303 for (®_idx, &val) in &ep.initial_regs {
304 if (reg_idx as usize) >= NUM_REGS {
305 return Err(ImageConvertError::RegisterIndexOutOfRange(reg_idx));
306 }
307 initial_regs[reg_idx as usize] = val;
308 }
309 // RISC-V SP convention: φ[1] = stack pointer.
310 let stack_top = ep.initial_regs.get(&1).copied().unwrap_or(0);
311 endpoints.push((
312 key.clone(),
313 EndpointDef {
314 entry_pc: ep.entry_pc,
315 stack_top,
316 arg_cnode_slot: Key::from(0u8),
317 arg_cnode_size: ep.arg_cnode_size,
318 initial_regs,
319 },
320 ));
321 }
322
323 let mut mappings = Vec::with_capacity(image.memory_mappings.len());
324 for m in &image.memory_mappings {
325 let steps = m.source.steps();
326 if steps.is_empty() {
327 return Err(ImageConvertError::SourcePathEmpty);
328 }
329 if steps.len() > MAX_SOURCE_DEPTH {
330 return Err(ImageConvertError::SourcePathTooDeep(steps.len()));
331 }
332 mappings.push(MemoryMapping {
333 start: m.start,
334 size: m.size,
335 source: m.source.clone(),
336 });
337 }
338
339 let pinned = build_image_slot_vec(pinned_hashes);
340 let initial = build_image_slot_vec(initial_hashes);
341
342 Ok(ImageCap {
343 code,
344 endpoints,
345 mappings,
346 pinned,
347 initial,
348 yield_receiver_slot: image.yield_receiver_slot.clone(),
349 gas_slots: image.gas_slots.clone(),
350 quota_slots: image.quota_slots.clone(),
351 })
352}
353
354/// Copy `bytes` into a `Vec<u8>` whose backing allocation is
355/// page-aligned and page-size-rounded (so the kernel can `va_to_pa` +
356/// direct-map the code region RO), but whose **length is the real code
357/// length** — not the padded capacity.
358///
359/// The length must stay exact: the recompiler iterates `code.len()`
360/// bytes, so a page-padded length would make it compile thousands of
361/// trailing zero bytes as bogus instructions (a ~page-sized fixed cost
362/// per recompile that dominates small guests). The runtime rounds the
363/// mapping size up to a page separately; the trailing capacity bytes
364/// stay zeroed and mapped but are never executed.
365fn alloc_page_aligned_code(bytes: &[u8]) -> Vec<u8> {
366 let mut v = super::data::alloc_page_aligned_zeroed(bytes.len());
367 v[..bytes.len()].copy_from_slice(bytes);
368 // Keep the page-aligned allocation + zeroed tail (capacity), but
369 // expose only the real code length. `truncate` never reallocates,
370 // so the base pointer stays page-aligned for `va_to_pa`.
371 v.truncate(bytes.len());
372 v
373}
374
375fn build_image_slot_vec(pairs: &[(Key, CapHash)]) -> Vec<ImageSlotEntry> {
376 let mut sorted: Vec<(Key, CapHash)> = pairs.to_vec();
377 // `Key: Ord` is lexicographic-by-byte; canonical ordering keeps the
378 // `ImageCap` hash insertion-order independent.
379 sorted.sort_by(|(a, _), (b, _)| a.cmp(b));
380 let mut out = Vec::with_capacity(sorted.len());
381 for (slot, cap_hash) in sorted {
382 out.push(ImageSlotEntry { slot, cap_hash });
383 }
384 out
385}