Skip to main content

javm_cap/cap/
image.rs

1//! `ImageCap` — Image cap.
2//!
3//! Stores a single code region, endpoints, mappings, and slot references as
4//! separate `Vec<T>` allocations. Allocation count per ImageCap is
5//! bounded regardless of content size; we accept that in exchange for
6//! direct field accessors.
7
8use alloc::vec::Vec;
9
10use crate::slot::{Key, SlotPath};
11
12use super::{CapHash, MAX_SOURCE_DEPTH, NUM_REGS};
13
14/// # Validation model: structure is eager, semantics are lazy
15///
16/// An `ImageCap` is admitted from untrusted input under a two-layer
17/// discipline:
18///
19/// - **Structure — validated eagerly** (here / in [`image_cap`], the
20///   "deblob"). The metadata that frames execution: `code` *length*
21///   (`≤ MAX_CODE_SIZE`), memory-mapping bounds, slot indices, source-path
22///   depth, endpoint indices. A malformed structural field has no clean
23///   execution point to fault on — it would diverge between engines or
24///   panic the host — so it is rejected at construction. This is cheap
25///   (`O(#endpoints + #mappings + #slots)`, it never scans the code) and
26///   therefore compatible with lazy compilation.
27///
28/// - **Semantics — validated lazily** (at execution, by both engines
29///   identically). The instruction stream itself: illegal/forbidden
30///   encodings, and `jal`/branch/`jalr`/`entry_pc` targets. These are
31///   **not** rejected at admission — any `code` bytes are accepted. A
32///   forbidden encoding decodes as illegal and an off-`bb_start` target is
33///   refused only *when reached*, as `ε = panic`. Lazy (not eager
34///   deblob) because, without an instruction bitmask, a linear validator
35///   can't tell code from data — eager rejection would reject legitimate
36///   code-as-data; lazy also keeps admission version-independent (a future
37///   ISA extension forks only at execution, never the cap set at
38///   admission) and preserves lazy compilation. The consensus requirement
39///   is that the two engines *agree* on what panics, not that the bytes
40///   are pre-screened. The producer toolchain still rejects forbidden
41///   encodings at build time as a diagnostic — that is UX, not a
42///   consensus rule.
43#[derive(Debug, ssz_derive::HashTreeRoot, rkyv::Archive, rkyv::Serialize, rkyv::Deserialize)]
44pub struct ImageCap {
45    /// The (single) code region: raw RV+C+custom-0 bytes, page-aligned
46    /// so the kernel can direct-map it RO at the fixed protocol
47    /// constant [`crate::layout::CODE_BASE`]. Empty for codeless
48    /// images. See [`ImageCap::code_mapping`].
49    pub code: Vec<u8>,
50    /// Endpoint definitions, keyed by a [`Key`] selector. A sparse, sorted
51    /// association list (`Dict`-style — kept sorted by key, no fixed capacity);
52    /// an absent key is an undefined endpoint. There is no dense array and no
53    /// `entry_pc == 0` sentinel, so an endpoint may legitimately start at code
54    /// offset 0. (`Vec<(Key, _)>` rather than `BTreeMap` because the rkyv wire
55    /// form has no `Ord` on the archived key.)
56    pub endpoints: Vec<(Key, EndpointDef)>,
57    /// Memory mappings.
58    pub mappings: Vec<MemoryMapping>,
59    /// Pinned read-only slots (Cap::Data / Cap::Image). Images only
60    /// ever reference content-addressed caps, so the target is a
61    /// plain `CapHash`.
62    pub pinned: Vec<ImageSlotEntry>,
63    /// Initial mutable slot state for non-pinned slots.
64    pub initial: Vec<ImageSlotEntry>,
65    /// Slot holding `Cap::Instance[YieldReceiver]` (the catch-set), if any.
66    pub yield_receiver_slot: Option<Key>,
67    /// Cnode slots holding the `Cap::Instance[Gas{meter_key}]` unit handles,
68    /// consulted in order. See [`crate::image::Image::gas_slots`].
69    pub gas_slots: Vec<Key>,
70    /// Cnode slots holding the `Cap::Instance[Quota{quota_key}]` unit handles.
71    pub quota_slots: Vec<Key>,
72}
73
74// Manual Clone: the derived impl would `Vec::clone` the `code` bytes,
75// which goes through `Global::alloc` at the default 1-byte alignment
76// for `[u8]`. The kernel direct-maps the code region into a ring-3 PT
77// and asserts `phys.is_multiple_of(PAGE_SIZE)`, so a cloned buffer on
78// an unaligned page would panic. Re-allocate `code` through
79// `alloc_page_aligned_code` to preserve the invariant across clones
80// (mirrors `DataContent`'s manual Clone). Other fields clone normally.
81impl Clone for ImageCap {
82    fn clone(&self) -> Self {
83        Self {
84            code: alloc_page_aligned_code(&self.code),
85            endpoints: self.endpoints.clone(),
86            mappings: self.mappings.clone(),
87            pinned: self.pinned.clone(),
88            initial: self.initial.clone(),
89            yield_receiver_slot: self.yield_receiver_slot.clone(),
90            gas_slots: self.gas_slots.clone(),
91            quota_slots: self.quota_slots.clone(),
92        }
93    }
94}
95
96impl ImageCap {
97    /// The executable code region as `(code_base, bytes)`. `code_base`
98    /// is the fixed protocol constant [`crate::layout::CODE_BASE`], so a
99    /// PVM PC is `code_base + byte_offset`. `None` if the image declares
100    /// no code (empty region) — such an image cannot execute.
101    pub fn code_mapping(&self) -> Option<(u32, &[u8])> {
102        if self.code.is_empty() {
103            return None;
104        }
105        Some((crate::layout::CODE_BASE, self.code.as_slice()))
106    }
107
108    /// True iff the memory mapping starting at guest VA `start` draws
109    /// from a pinned (read-only) slot, so it must be laid read-only — a
110    /// guest store to it faults. Mirrors the recompiler's pinned-vs-
111    /// initial slot classification (`nub-arch-x86` `build_runtime`).
112    /// Derived from [`Self::pinned`] at lay time, so a mapping carries no
113    /// per-mapping permission field; the interpreter drivers (`javm`
114    /// `build_entry`, `nub-arch-local`) call this so they classify
115    /// identically to the recompiler.
116    pub fn mapping_is_pinned(&self, start: u32) -> bool {
117        self.mappings.iter().any(|m| {
118            m.start as u32 == start
119                && m.source
120                    .steps()
121                    .first()
122                    .is_some_and(|root| self.pinned.iter().any(|p| &p.slot == root))
123        })
124    }
125}
126
127/// Endpoint definition. Dense `initial_regs` array; index `i`
128/// corresponds to PVM register `φ[i]`. `0` is "use default" (same
129/// semantics as the spec's old `BTreeMap<u8, u64>` when the key is
130/// absent).
131// `Key` is heap-spillable (`SmallVec`), so `EndpointDef` is no longer
132// `Copy`; it threads through the cap layer by value/clone like the other
133// `Key`-bearing structs.
134#[derive(
135    Clone,
136    Debug,
137    PartialEq,
138    Eq,
139    ssz_derive::Encode,
140    ssz_derive::Decode,
141    ssz_derive::HashTreeRoot,
142    rkyv::Archive,
143    rkyv::Serialize,
144    rkyv::Deserialize,
145)]
146pub struct EndpointDef {
147    pub entry_pc: u64,
148    pub stack_top: u64,
149    pub arg_cnode_slot: Key,
150    pub arg_cnode_size: u8,
151    pub initial_regs: [u64; NUM_REGS],
152}
153
154/// One mapped region. The kernel resolves `source` (a [`SlotPath`] to a
155/// `Cap::Data`) at instance start, reads the bytes, and lays them at
156/// `[start, start + size)`.
157///
158/// `source` is a variable-length [`SlotPath`] (was a fixed `[SlotIdx;
159/// MAX_SOURCE_DEPTH]` + length), so `MemoryMapping` is now a
160/// variable-length SSZ container with a fully derived codec — no hand-rolled
161/// SSZ. The eager depth bound (`≤ MAX_SOURCE_DEPTH`) is enforced in
162/// [`image_cap`] at deblob, not in the wire decode.
163#[derive(
164    Clone,
165    Debug,
166    PartialEq,
167    Eq,
168    ssz_derive::Encode,
169    ssz_derive::Decode,
170    ssz_derive::HashTreeRoot,
171    rkyv::Archive,
172    rkyv::Serialize,
173    rkyv::Deserialize,
174)]
175pub struct MemoryMapping {
176    pub start: u64,
177    pub size: u64,
178    /// Cnode path resolving to the `Cap::Data` whose bytes back this region.
179    pub source: SlotPath,
180}
181
182impl MemoryMapping {
183    /// The cnode path steps — the keys to walk to the `Cap::Data` backing
184    /// this mapping. Non-empty for a well-formed mapping.
185    pub fn path(&self) -> &[Key] {
186        self.source.steps()
187    }
188}
189
190/// `(slot_key, cap_hash)` pair used by Image's `pinned` and
191/// `initial` arrays. References content-addressed caps only.
192///
193/// `Key` is heap-spillable, so this is no longer `Copy`.
194#[derive(
195    Clone,
196    Debug,
197    PartialEq,
198    Eq,
199    ssz_derive::Encode,
200    ssz_derive::Decode,
201    ssz_derive::HashTreeRoot,
202    rkyv::Archive,
203    rkyv::Serialize,
204    rkyv::Deserialize,
205)]
206pub struct ImageSlotEntry {
207    pub slot: Key,
208    pub cap_hash: CapHash,
209}
210
211/// Failure modes when converting an SSZ-encoded [`crate::image::Image`]
212/// into an [`ImageCap`]. The conversion preserves the slots and metadata
213/// that remain cap-resident, drops only per-endpoint `arg_registers`, and
214/// is constrained in others — these errors flag the constraint violations.
215#[derive(Debug, thiserror::Error)]
216pub enum ImageConvertError {
217    #[error("code region {0} bytes exceeds MAX_CODE_SIZE ({1})")]
218    CodeTooLarge(usize, u32),
219    #[error("code ref [{0}, {0}+{1}) out of arena bounds (arena {2} bytes)")]
220    CodeRefOutOfRange(u32, u32, usize),
221    #[error("data desc invalid: {0:?}")]
222    DataDesc(crate::image::DataDescError),
223    #[error("memory mapping source path empty")]
224    SourcePathEmpty,
225    #[error("memory mapping source path too deep (steps={0} > MAX_SOURCE_DEPTH)")]
226    SourcePathTooDeep(usize),
227    #[error("register index {0} >= NUM_REGS")]
228    RegisterIndexOutOfRange(u8),
229}
230
231/// Build an [`ImageCap`] from the SSZ-encoded [`crate::image::Image`]
232/// shape. The Data content referenced by pinned and initial slots must
233/// already be published — pass the resolved `(SlotIdx, CapHash)` pairs
234/// in `pinned_hashes` and `initial_hashes`. The builder sorts both lists
235/// by slot index.
236///
237/// **Lossy fields (intentionally dropped):**
238/// - per-endpoint `arg_registers`: the calling convention is implicit
239///   in the new shape. `gas_slots` and `quota_slots` remain image
240///   metadata and are carried through unchanged.
241///
242/// **Field mappings:**
243/// - Endpoints are stored in a sparse `Key -> EndpointDef` map (no fixed
244///   capacity). `stack_top` is extracted from the old `initial_regs[1]`
245///   (RISC-V SP convention); `arg_cnode_slot` defaults to `Key::from(0)`.
246/// - `MemoryMapping.source` (a [`SlotPath`]) is carried through verbatim;
247///   paths that are empty or deeper than `MAX_SOURCE_DEPTH` error.
248pub fn image_cap(
249    image: &crate::image::Image,
250    pinned_hashes: &[(Key, CapHash)],
251    initial_hashes: &[(Key, CapHash)],
252) -> Result<ImageCap, ImageConvertError> {
253    // Structural invariant (eager): the code region maps RO at
254    // `[CODE_BASE, DATA_BASE)`, so it must fit under `MAX_CODE_SIZE` —
255    // otherwise a high code offset would alias the data region. The
256    // *contents* of `code` are not validated here (instruction legality
257    // is checked lazily, at execution); only its size is a structural
258    // bound. Checked before the page-aligned copy so an oversized blob
259    // is rejected without allocating it.
260    let code_len = image.code.len as usize;
261    if code_len > crate::layout::MAX_CODE_SIZE as usize {
262        return Err(ImageConvertError::CodeTooLarge(
263            code_len,
264            crate::layout::MAX_CODE_SIZE,
265        ));
266    }
267    // The code window `[arena_off, arena_off + len)` must lie within the
268    // arena (untrusted wire input — fail loud, never slice out of range).
269    let code_in_bounds = (image.code.arena_off as usize)
270        .checked_add(code_len)
271        .is_some_and(|end| end <= image.arena.len());
272    if !code_in_bounds {
273        return Err(ImageConvertError::CodeRefOutOfRange(
274            image.code.arena_off,
275            image.code.len,
276            image.arena.len(),
277        ));
278    }
279    // Every pinned/initial data descriptor must reference the arena
280    // soundly (page-aligned, in-bounds, page_index < page_count, canonical
281    // page order) before any downstream materialization slices the arena.
282    for slot in image.pinned_slots.values() {
283        if let crate::image::PinnedCap::Data { desc } = slot {
284            desc.validate(image.arena.len())
285                .map_err(ImageConvertError::DataDesc)?;
286        }
287    }
288    for desc in image.initial_slots.values() {
289        desc.validate(image.arena.len())
290            .map_err(ImageConvertError::DataDesc)?;
291    }
292    // Code: page-aligned copy so the kernel can direct-map it RO at
293    // `layout::CODE_BASE`.
294    let code = alloc_page_aligned_code(image.code_bytes());
295
296    // Endpoints: a sparse, sorted `Key -> EndpointDef` association list (no
297    // fixed capacity, no dense `entry_pc == 0` sentinel — presence is what
298    // defines an endpoint). `image.endpoints` is a BTreeMap, so iterating it
299    // yields keys in sorted order and the resulting Vec stays sorted by Key.
300    let mut endpoints = Vec::with_capacity(image.endpoints.len());
301    for (key, ep) in &image.endpoints {
302        let mut initial_regs = [0u64; NUM_REGS];
303        for (&reg_idx, &val) in &ep.initial_regs {
304            if (reg_idx as usize) >= NUM_REGS {
305                return Err(ImageConvertError::RegisterIndexOutOfRange(reg_idx));
306            }
307            initial_regs[reg_idx as usize] = val;
308        }
309        // RISC-V SP convention: φ[1] = stack pointer.
310        let stack_top = ep.initial_regs.get(&1).copied().unwrap_or(0);
311        endpoints.push((
312            key.clone(),
313            EndpointDef {
314                entry_pc: ep.entry_pc,
315                stack_top,
316                arg_cnode_slot: Key::from(0u8),
317                arg_cnode_size: ep.arg_cnode_size,
318                initial_regs,
319            },
320        ));
321    }
322
323    let mut mappings = Vec::with_capacity(image.memory_mappings.len());
324    for m in &image.memory_mappings {
325        let steps = m.source.steps();
326        if steps.is_empty() {
327            return Err(ImageConvertError::SourcePathEmpty);
328        }
329        if steps.len() > MAX_SOURCE_DEPTH {
330            return Err(ImageConvertError::SourcePathTooDeep(steps.len()));
331        }
332        mappings.push(MemoryMapping {
333            start: m.start,
334            size: m.size,
335            source: m.source.clone(),
336        });
337    }
338
339    let pinned = build_image_slot_vec(pinned_hashes);
340    let initial = build_image_slot_vec(initial_hashes);
341
342    Ok(ImageCap {
343        code,
344        endpoints,
345        mappings,
346        pinned,
347        initial,
348        yield_receiver_slot: image.yield_receiver_slot.clone(),
349        gas_slots: image.gas_slots.clone(),
350        quota_slots: image.quota_slots.clone(),
351    })
352}
353
354/// Copy `bytes` into a `Vec<u8>` whose backing allocation is
355/// page-aligned and page-size-rounded (so the kernel can `va_to_pa` +
356/// direct-map the code region RO), but whose **length is the real code
357/// length** — not the padded capacity.
358///
359/// The length must stay exact: the recompiler iterates `code.len()`
360/// bytes, so a page-padded length would make it compile thousands of
361/// trailing zero bytes as bogus instructions (a ~page-sized fixed cost
362/// per recompile that dominates small guests). The runtime rounds the
363/// mapping size up to a page separately; the trailing capacity bytes
364/// stay zeroed and mapped but are never executed.
365fn alloc_page_aligned_code(bytes: &[u8]) -> Vec<u8> {
366    let mut v = super::data::alloc_page_aligned_zeroed(bytes.len());
367    v[..bytes.len()].copy_from_slice(bytes);
368    // Keep the page-aligned allocation + zeroed tail (capacity), but
369    // expose only the real code length. `truncate` never reallocates,
370    // so the base pointer stays page-aligned for `va_to_pa`.
371    v.truncate(bytes.len());
372    v
373}
374
375fn build_image_slot_vec(pairs: &[(Key, CapHash)]) -> Vec<ImageSlotEntry> {
376    let mut sorted: Vec<(Key, CapHash)> = pairs.to_vec();
377    // `Key: Ord` is lexicographic-by-byte; canonical ordering keeps the
378    // `ImageCap` hash insertion-order independent.
379    sorted.sort_by(|(a, _), (b, _)| a.cmp(b));
380    let mut out = Vec::with_capacity(sorted.len());
381    for (slot, cap_hash) in sorted {
382        out.push(ImageSlotEntry { slot, cap_hash });
383    }
384    out
385}