Skip to main content

javm_cap/cap/
page.rs

1//! `PageSlot` and `PageRef` — DataCap page storage.
2//!
3//! Each page is owned by the DataCap that holds it. Sharing across
4//! DataCap CoW clones is done via [`PageRef`], a refcounted handle
5//! over [`PageBytes`] backed by the global allocator. The cache
6//! subsystem doesn't index pages by hash — pages aren't first-class
7//! caps. They're internal to the DataCap layer.
8
9use alloc::sync::Arc;
10use alloc::vec::Vec;
11
12use super::CapHash;
13
14/// Sparse representation of a paged DataCap's pages. `Empty` is the
15/// canonical zero page; `Loaded` holds a refcounted byte slab;
16/// `Missing` records the page's content hash so a host callback can
17/// later resolve it (V1: never observed — we always pre-publish).
18#[derive(Clone, Debug, rkyv::Archive, rkyv::Serialize, rkyv::Deserialize)]
19pub enum PageSlot {
20    Empty,
21    Loaded(PageRef),
22    Missing(CapHash),
23}
24
25/// Refcounted handle to a [`PageBytes`] allocated by the global
26/// allocator. Plain `std::sync::Arc` alias for cap-layer readability.
27pub type PageRef = Arc<PageBytes>;
28
29/// One page's bytes plus its precomputed content hash.
30///
31/// Sharing across DataCap CoW clones is via [`PageRef`] (= `Arc`),
32/// which carries its own refcount — `PageBytes` itself is not
33/// refcounted.
34///
35/// `Clone` and rkyv `Deserialize` are **hand-written** to preserve the
36/// `PAGE_SIZE`-alignment invariant of `bytes`: the recompiler resolves a
37/// page's physical address from its slab pointer and direct-maps it into a
38/// ring-3 page table (`pt_map_leaf` requires a page-aligned PA). The derived
39/// `Clone` / `Deserialize` would `Vec`-allocate `bytes` at alignment 1, so a
40/// cloned or wire-decoded page would land mid-page and the recompiler would map
41/// the wrong physical frame. Both re-allocate through
42/// [`super::data::alloc_page_aligned_zeroed`]. (Mirrors the page-alignment
43/// discipline the legacy `DataContent::Inline` kept in its manual `Clone`.)
44#[derive(Debug, rkyv::Archive, rkyv::Serialize)]
45pub struct PageBytes {
46    pub hash: CapHash,
47    pub bytes: Vec<u8>,
48}
49
50impl Clone for PageBytes {
51    fn clone(&self) -> Self {
52        Self::realigned(self.hash, &self.bytes)
53    }
54}
55
56impl PageBytes {
57    /// Build a `PageBytes` with `bytes` re-allocated into a `PAGE_SIZE`-aligned
58    /// slab (zero-padded tail). Used by the page-aligning `Clone` / rkyv
59    /// `Deserialize`.
60    fn realigned(hash: CapHash, src: &[u8]) -> Self {
61        use super::data::{PAGE_SIZE, alloc_page_aligned_zeroed};
62        let mut bytes = alloc_page_aligned_zeroed(src.len().max(PAGE_SIZE));
63        bytes[..src.len()].copy_from_slice(src);
64        Self { hash, bytes }
65    }
66}
67
68impl<D: rkyv::rancor::Fallible + ?Sized> rkyv::Deserialize<PageBytes, D> for ArchivedPageBytes {
69    fn deserialize(&self, _deserializer: &mut D) -> Result<PageBytes, D::Error> {
70        // Re-align into a `PAGE_SIZE` slab (load-bearing for the recompiler
71        // direct-map — see the `PageBytes` docs).
72        Ok(PageBytes::realigned(self.hash, self.bytes.as_slice()))
73    }
74}
75
76impl PageBytes {
77    /// Build a `PageBytes` from up to `PAGE_SIZE` content bytes: a
78    /// `PAGE_SIZE`-aligned slab (zero-padded tail) plus the precomputed
79    /// content hash ([`super::data::page_content_hash`]). The slab alignment is
80    /// load-bearing — the recompiler maps the page's slab directly into a
81    /// ring-3 page table.
82    pub fn from_content(content: &[u8]) -> Self {
83        use super::data::{PAGE_SIZE, alloc_page_aligned_zeroed, page_content_hash};
84        let hash = page_content_hash(content);
85        let mut bytes = alloc_page_aligned_zeroed(PAGE_SIZE);
86        let n = content.len().min(PAGE_SIZE);
87        bytes[..n].copy_from_slice(&content[..n]);
88        Self { hash, bytes }
89    }
90
91    /// Build a `PageBytes` from up to `PAGE_SIZE` source bytes **without**
92    /// computing the content hash: a `PAGE_SIZE`-aligned slab (zero-padded
93    /// tail) carrying a `[0u8; 32]` sentinel `hash`.
94    ///
95    /// Used by the x86 recompiler's ring-0 copy-on-write page-fault handler,
96    /// which inserts the fresh page straight into a [`super::data::DataCap`]
97    /// overlay. Overlay pages are never hashed before
98    /// [`super::data::DataCap::flush`] (which rebuilds each page via
99    /// [`Self::from_content`], recomputing the real hash and re-asserting the
100    /// substitution invariant), so keeping SHA-256 out of the #PF path keeps
101    /// the per-page CoW gas charge identical between the interpreter and the
102    /// recompiler. The slab is still `PAGE_SIZE`-aligned — load-bearing for
103    /// the direct ring-3 PT map.
104    pub fn from_page_copy_unhashed(src: &[u8]) -> Self {
105        use super::data::{PAGE_SIZE, alloc_page_aligned_zeroed};
106        let mut bytes = alloc_page_aligned_zeroed(PAGE_SIZE);
107        let n = src.len().min(PAGE_SIZE);
108        bytes[..n].copy_from_slice(&src[..n]);
109        debug_assert_eq!(
110            bytes.as_ptr() as usize % PAGE_SIZE,
111            0,
112            "from_page_copy_unhashed: slab must be PAGE_SIZE-aligned",
113        );
114        Self {
115            hash: [0u8; 32],
116            bytes,
117        }
118    }
119}
120
121// --------------------------------------------------------------------------
122// Hand-written SSZ impls for `PageSlot` and `PageBytes`.
123//
124// `HashTreeRoot` is deliberately not derived: the pass-through semantics
125// are load-bearing for the substitution invariant. A `Loaded(page)` slot
126// must hash identically to a `Missing(h)` slot when `h == page.hash`, and
127// a `Loaded(page)` slot's root must equal `page.hash` (the precomputed
128// page digest). A `derive(HashTreeRoot)` would mix in a selector byte and
129// break that equality.
130//
131// --------------------------------------------------------------------------
132
133impl ssz::HashTreeRoot for PageSlot {
134    fn hash_tree_root<D: ::ssz::digest::Digest<OutputSize = ::ssz::digest::typenum::U32>>(
135        &self,
136    ) -> [u8; 32] {
137        match self {
138            // Canonical zero-page sentinel. Under SSZ, an empty page's
139            // root is the empty 32-byte chunk.
140            PageSlot::Empty => [0u8; 32],
141            PageSlot::Loaded(pr) => (**pr).hash_tree_root::<D>(),
142            PageSlot::Missing(h) => *h,
143        }
144    }
145}
146
147impl ssz::HashTreeRoot for PageBytes {
148    fn hash_tree_root<D: ::ssz::digest::Digest<OutputSize = ::ssz::digest::typenum::U32>>(
149        &self,
150    ) -> [u8; 32] {
151        // `self.hash` is the precomputed page-content identity (kept
152        // consistent with `bytes` by `cache.rs`). Returning it directly
153        // preserves substitution: a `Loaded(page)` slot is
154        // indistinguishable from `Missing(page.hash)` at the SSZ
155        // merkleization level.
156        self.hash
157    }
158}
159
160impl ssz::Encode for PageSlot {
161    fn is_ssz_fixed_len() -> bool {
162        false
163    }
164    fn ssz_fixed_len() -> usize {
165        ssz::BYTES_PER_LENGTH_OFFSET
166    }
167    fn ssz_bytes_len(&self) -> usize {
168        match self {
169            PageSlot::Empty => 1,
170            PageSlot::Loaded(pr) => 1 + (**pr).ssz_bytes_len(),
171            PageSlot::Missing(_) => 1 + 32,
172        }
173    }
174    fn ssz_append(&self, buf: &mut Vec<u8>) {
175        match self {
176            PageSlot::Empty => buf.push(0),
177            PageSlot::Loaded(pr) => {
178                buf.push(1);
179                (**pr).ssz_append(buf);
180            }
181            PageSlot::Missing(h) => {
182                buf.push(2);
183                buf.extend_from_slice(h);
184            }
185        }
186    }
187}
188
189impl ssz::Encode for PageBytes {
190    fn is_ssz_fixed_len() -> bool {
191        false
192    }
193    fn ssz_fixed_len() -> usize {
194        ssz::BYTES_PER_LENGTH_OFFSET
195    }
196    fn ssz_bytes_len(&self) -> usize {
197        // SSZ container with one fixed (hash) and one variable (bytes):
198        // fixed-region = 32 (hash) + 4 (offset slot) = 36; variable
199        // payload = bytes.len().
200        32 + 4 + self.bytes.len()
201    }
202    fn ssz_append(&self, buf: &mut Vec<u8>) {
203        // Field 0: hash (fixed, 32 bytes).
204        // Field 1: bytes (variable, offset slot + payload).
205        let fixed_region = 32 + 4;
206        buf.extend_from_slice(&self.hash);
207        // Offset to the variable payload = fixed_region size.
208        buf.extend_from_slice(&(fixed_region as u32).to_le_bytes());
209        buf.extend_from_slice(self.bytes.as_slice());
210    }
211}