javm_cap/cap/page.rs
1//! `PageSlot` and `PageRef` — DataCap page storage.
2//!
3//! Each page is owned by the DataCap that holds it. Sharing across
4//! DataCap CoW clones is done via [`PageRef`], a refcounted handle
5//! over [`PageBytes`] backed by the global allocator. The cache
6//! subsystem doesn't index pages by hash — pages aren't first-class
7//! caps. They're internal to the DataCap layer.
8
9use alloc::sync::Arc;
10use alloc::vec::Vec;
11
12use super::CapHash;
13
14/// Sparse representation of a paged DataCap's pages. `Empty` is the
15/// canonical zero page; `Loaded` holds a refcounted byte slab;
16/// `Missing` records the page's content hash so a host callback can
17/// later resolve it (V1: never observed — we always pre-publish).
18#[derive(Clone, Debug, rkyv::Archive, rkyv::Serialize, rkyv::Deserialize)]
19pub enum PageSlot {
20 Empty,
21 Loaded(PageRef),
22 Missing(CapHash),
23}
24
25/// Refcounted handle to a [`PageBytes`] allocated by the global
26/// allocator. Plain `std::sync::Arc` alias for cap-layer readability.
27pub type PageRef = Arc<PageBytes>;
28
29/// One page's bytes plus its precomputed content hash.
30///
31/// Sharing across DataCap CoW clones is via [`PageRef`] (= `Arc`),
32/// which carries its own refcount — `PageBytes` itself is not
33/// refcounted.
34///
35/// `Clone` and rkyv `Deserialize` are **hand-written** to preserve the
36/// `PAGE_SIZE`-alignment invariant of `bytes`: the recompiler resolves a
37/// page's physical address from its slab pointer and direct-maps it into a
38/// ring-3 page table (`pt_map_leaf` requires a page-aligned PA). The derived
39/// `Clone` / `Deserialize` would `Vec`-allocate `bytes` at alignment 1, so a
40/// cloned or wire-decoded page would land mid-page and the recompiler would map
41/// the wrong physical frame. Both re-allocate through
42/// [`super::data::alloc_page_aligned_zeroed`]. (Mirrors the page-alignment
43/// discipline the legacy `DataContent::Inline` kept in its manual `Clone`.)
44#[derive(Debug, rkyv::Archive, rkyv::Serialize)]
45pub struct PageBytes {
46 pub hash: CapHash,
47 pub bytes: Vec<u8>,
48}
49
50impl Clone for PageBytes {
51 fn clone(&self) -> Self {
52 Self::realigned(self.hash, &self.bytes)
53 }
54}
55
56impl PageBytes {
57 /// Build a `PageBytes` with `bytes` re-allocated into a `PAGE_SIZE`-aligned
58 /// slab (zero-padded tail). Used by the page-aligning `Clone` / rkyv
59 /// `Deserialize`.
60 fn realigned(hash: CapHash, src: &[u8]) -> Self {
61 use super::data::{PAGE_SIZE, alloc_page_aligned_zeroed};
62 let mut bytes = alloc_page_aligned_zeroed(src.len().max(PAGE_SIZE));
63 bytes[..src.len()].copy_from_slice(src);
64 Self { hash, bytes }
65 }
66}
67
68impl<D: rkyv::rancor::Fallible + ?Sized> rkyv::Deserialize<PageBytes, D> for ArchivedPageBytes {
69 fn deserialize(&self, _deserializer: &mut D) -> Result<PageBytes, D::Error> {
70 // Re-align into a `PAGE_SIZE` slab (load-bearing for the recompiler
71 // direct-map — see the `PageBytes` docs).
72 Ok(PageBytes::realigned(self.hash, self.bytes.as_slice()))
73 }
74}
75
76impl PageBytes {
77 /// Build a `PageBytes` from up to `PAGE_SIZE` content bytes: a
78 /// `PAGE_SIZE`-aligned slab (zero-padded tail) plus the precomputed
79 /// content hash ([`super::data::page_content_hash`]). The slab alignment is
80 /// load-bearing — the recompiler maps the page's slab directly into a
81 /// ring-3 page table.
82 pub fn from_content(content: &[u8]) -> Self {
83 use super::data::{PAGE_SIZE, alloc_page_aligned_zeroed, page_content_hash};
84 let hash = page_content_hash(content);
85 let mut bytes = alloc_page_aligned_zeroed(PAGE_SIZE);
86 let n = content.len().min(PAGE_SIZE);
87 bytes[..n].copy_from_slice(&content[..n]);
88 Self { hash, bytes }
89 }
90
91 /// Build a `PageBytes` from up to `PAGE_SIZE` source bytes **without**
92 /// computing the content hash: a `PAGE_SIZE`-aligned slab (zero-padded
93 /// tail) carrying a `[0u8; 32]` sentinel `hash`.
94 ///
95 /// Used by the x86 recompiler's ring-0 copy-on-write page-fault handler,
96 /// which inserts the fresh page straight into a [`super::data::DataCap`]
97 /// overlay. Overlay pages are never hashed before
98 /// [`super::data::DataCap::flush`] (which rebuilds each page via
99 /// [`Self::from_content`], recomputing the real hash and re-asserting the
100 /// substitution invariant), so keeping SHA-256 out of the #PF path keeps
101 /// the per-page CoW gas charge identical between the interpreter and the
102 /// recompiler. The slab is still `PAGE_SIZE`-aligned — load-bearing for
103 /// the direct ring-3 PT map.
104 pub fn from_page_copy_unhashed(src: &[u8]) -> Self {
105 use super::data::{PAGE_SIZE, alloc_page_aligned_zeroed};
106 let mut bytes = alloc_page_aligned_zeroed(PAGE_SIZE);
107 let n = src.len().min(PAGE_SIZE);
108 bytes[..n].copy_from_slice(&src[..n]);
109 debug_assert_eq!(
110 bytes.as_ptr() as usize % PAGE_SIZE,
111 0,
112 "from_page_copy_unhashed: slab must be PAGE_SIZE-aligned",
113 );
114 Self {
115 hash: [0u8; 32],
116 bytes,
117 }
118 }
119}
120
121// --------------------------------------------------------------------------
122// Hand-written SSZ impls for `PageSlot` and `PageBytes`.
123//
124// `HashTreeRoot` is deliberately not derived: the pass-through semantics
125// are load-bearing for the substitution invariant. A `Loaded(page)` slot
126// must hash identically to a `Missing(h)` slot when `h == page.hash`, and
127// a `Loaded(page)` slot's root must equal `page.hash` (the precomputed
128// page digest). A `derive(HashTreeRoot)` would mix in a selector byte and
129// break that equality.
130//
131// --------------------------------------------------------------------------
132
133impl ssz::HashTreeRoot for PageSlot {
134 fn hash_tree_root<D: ::ssz::digest::Digest<OutputSize = ::ssz::digest::typenum::U32>>(
135 &self,
136 ) -> [u8; 32] {
137 match self {
138 // Canonical zero-page sentinel. Under SSZ, an empty page's
139 // root is the empty 32-byte chunk.
140 PageSlot::Empty => [0u8; 32],
141 PageSlot::Loaded(pr) => (**pr).hash_tree_root::<D>(),
142 PageSlot::Missing(h) => *h,
143 }
144 }
145}
146
147impl ssz::HashTreeRoot for PageBytes {
148 fn hash_tree_root<D: ::ssz::digest::Digest<OutputSize = ::ssz::digest::typenum::U32>>(
149 &self,
150 ) -> [u8; 32] {
151 // `self.hash` is the precomputed page-content identity (kept
152 // consistent with `bytes` by `cache.rs`). Returning it directly
153 // preserves substitution: a `Loaded(page)` slot is
154 // indistinguishable from `Missing(page.hash)` at the SSZ
155 // merkleization level.
156 self.hash
157 }
158}
159
160impl ssz::Encode for PageSlot {
161 fn is_ssz_fixed_len() -> bool {
162 false
163 }
164 fn ssz_fixed_len() -> usize {
165 ssz::BYTES_PER_LENGTH_OFFSET
166 }
167 fn ssz_bytes_len(&self) -> usize {
168 match self {
169 PageSlot::Empty => 1,
170 PageSlot::Loaded(pr) => 1 + (**pr).ssz_bytes_len(),
171 PageSlot::Missing(_) => 1 + 32,
172 }
173 }
174 fn ssz_append(&self, buf: &mut Vec<u8>) {
175 match self {
176 PageSlot::Empty => buf.push(0),
177 PageSlot::Loaded(pr) => {
178 buf.push(1);
179 (**pr).ssz_append(buf);
180 }
181 PageSlot::Missing(h) => {
182 buf.push(2);
183 buf.extend_from_slice(h);
184 }
185 }
186 }
187}
188
189impl ssz::Encode for PageBytes {
190 fn is_ssz_fixed_len() -> bool {
191 false
192 }
193 fn ssz_fixed_len() -> usize {
194 ssz::BYTES_PER_LENGTH_OFFSET
195 }
196 fn ssz_bytes_len(&self) -> usize {
197 // SSZ container with one fixed (hash) and one variable (bytes):
198 // fixed-region = 32 (hash) + 4 (offset slot) = 36; variable
199 // payload = bytes.len().
200 32 + 4 + self.bytes.len()
201 }
202 fn ssz_append(&self, buf: &mut Vec<u8>) {
203 // Field 0: hash (fixed, 32 bytes).
204 // Field 1: bytes (variable, offset slot + payload).
205 let fixed_region = 32 + 4;
206 buf.extend_from_slice(&self.hash);
207 // Offset to the variable payload = fixed_region size.
208 buf.extend_from_slice(&(fixed_region as u32).to_le_bytes());
209 buf.extend_from_slice(self.bytes.as_slice());
210 }
211}