javm_fuzz/replay.rs
1//! Dual-engine replay: run a [`Program`] through the interpreter and the x86
2//! recompiler and compare. Gated to linux/x86_64 (the recompiler needs the
3//! Hyperlight host stack, so the whole `javm-bench` crate is gated to it).
4//!
5//! Both engines are driven through the *same* `javm-bench` `BuiltCaps` →
6//! `invoke_cached` path, so they receive byte-identical caps/initial-state —
7//! any divergence is a real engine disagreement, never a setup skew.
8
9use crate::{Program, SIG_BASE, encode};
10use javm_bench::{BuiltCaps, RawRun, run_interpreter_raw, run_recompiler_raw};
11use javm_cap::abi::SCRATCHPAD_SLOT;
12use javm_cap::image::{EndpointDef, Image, ImageBuilder, MemoryMapping};
13use javm_cap::slot::{Key, SlotPath};
14use std::collections::BTreeMap;
15
16/// Cnode slot the fuzz memory window's backing data cap occupies.
17const WINDOW_SLOT: u32 = 1;
18
19/// One page — the scratchpad (`slot[0]`) signature region size (the program's
20/// signature epilogue stores the `SIG_BYTES`-byte register file into its head).
21const SIG_REGION_BYTES: u64 = 4096;
22
23/// Map the scratchpad (`slot[0]`) signature region at [`SIG_BASE`] into the
24/// builder: an empty-content initial data slot of one page. The guest's
25/// signature epilogue CoWs this region during the run; both engines surface its
26/// effective bytes as the run's register signature.
27fn add_signature_region(b: ImageBuilder) -> ImageBuilder {
28 let slot = Key::from(SCRATCHPAD_SLOT);
29 b.mapping(MemoryMapping {
30 start: SIG_BASE as u64,
31 size: SIG_REGION_BYTES,
32 source: SlotPath::root(slot.clone()),
33 })
34 .initial_data(slot, Vec::new(), SIG_REGION_BYTES)
35}
36
37/// Build an `Image` from raw instruction `words` (+ `ecalli 0` terminator)
38/// with a **pinned read-only** data cap of `ro_bytes` mapped at `ro_start`
39/// — for category-#3 read-only-cluster differential tests. Both engines
40/// materialize it `PinnedCapRo` (interp perm-RO, recompiler MatRange) and
41/// charge it per 2 MiB cluster.
42pub fn image_with_ro(words: &[u32], ro_start: u32, ro_bytes: &[u8]) -> Image {
43 let mut code = encode::enc(words);
44 code.extend_from_slice(&encode::enc(&[encode::HALT]));
45 let slot = Key::from((WINDOW_SLOT + 1) as u8);
46 ImageBuilder::new()
47 .code(code)
48 .endpoint(
49 Key::from(0u8),
50 EndpointDef {
51 entry_pc: 0,
52 arg_registers: 0,
53 arg_cnode_size: 0,
54 initial_regs: BTreeMap::new(),
55 },
56 )
57 .mapping(MemoryMapping {
58 start: ro_start as u64,
59 size: ro_bytes.len() as u64,
60 source: SlotPath::root(slot.clone()),
61 })
62 .pinned_data(slot, ro_bytes.to_vec(), ro_bytes.len() as u64)
63 .build()
64}
65
66/// Build an `Image` from raw `words` with **several** pinned read-only data
67/// caps, each `(start, bytes)` — for multi-cap read-only-cluster differential
68/// tests (e.g. two distinct caps sharing one 2 MiB cluster). Each cap takes its
69/// own cnode slot, so the recompiler resolves each as a separate `PinnedCapRo`
70/// `MatRange` with its own source PA, exactly as production does.
71pub fn image_with_ro_caps(words: &[u32], caps: &[(u32, &[u8])]) -> Image {
72 let mut code = encode::enc(words);
73 code.extend_from_slice(&encode::enc(&[encode::HALT]));
74 let mut b = ImageBuilder::new().code(code).endpoint(
75 Key::from(0u8),
76 EndpointDef {
77 entry_pc: 0,
78 arg_registers: 0,
79 arg_cnode_size: 0,
80 initial_regs: BTreeMap::new(),
81 },
82 );
83 for (i, (start, bytes)) in caps.iter().enumerate() {
84 let slot = Key::from((WINDOW_SLOT + 1 + i as u32) as u8);
85 b = b
86 .mapping(MemoryMapping {
87 start: *start as u64,
88 size: bytes.len() as u64,
89 source: SlotPath::root(slot.clone()),
90 })
91 .pinned_data(slot, bytes.to_vec(), bytes.len() as u64);
92 }
93 b.build()
94}
95
96/// Run a pre-built `Image` through both engines and compare.
97pub fn diff_image(img: &Image) -> Diff {
98 let built = BuiltCaps::for_image(img, 0);
99 let interp = run_interpreter_raw(&built);
100 let recomp = run_recompiler_raw(&built);
101 Diff { interp, recomp }
102}
103
104/// Build the `Image` for a program: its code (body + signature epilogue) plus
105/// the appended `ecalli 0` terminator, entered at pc 0 with the program's
106/// initial register seed.
107///
108/// The Image always maps the scratchpad (`slot[0]`) signature region at
109/// [`SIG_BASE`] (via `add_signature_region`); the program's signature epilogue
110/// stores its register file there, and both engines surface the region's
111/// effective bytes for the lossless differential.
112///
113/// When the program declares an `init_mem` window, the Image declares a
114/// matching RW data mapping so **both** engines size their data extent to
115/// cover it and lazily materialize (category #3) the same pages. The window is
116/// backed by an *empty* initial slot (zero-filled, page-aligned `mem_buf`), so
117/// both engines treat it as ephemeral — the lazy-materialization charge is
118/// identical regardless, and this keeps the differential off the cap-PA
119/// page-in path (whose alignment is a separate concern).
120pub fn image_for(prog: &Program) -> Image {
121 let mut code = prog.code_bytes();
122 code.extend_from_slice(&encode::enc(&[encode::HALT]));
123
124 let mut b = ImageBuilder::new().code(code).endpoint(
125 Key::from(0u8),
126 EndpointDef {
127 entry_pc: 0,
128 arg_registers: 0,
129 arg_cnode_size: 0,
130 initial_regs: prog.init_regs.clone(),
131 },
132 );
133 b = add_signature_region(b);
134 if let Some(mem) = &prog.init_mem {
135 let slot = Key::from((WINDOW_SLOT) as u8);
136 // Empty content → no overlay; the mapping only sizes the data extent,
137 // and the window materializes as ephemeral zero pages on both engines.
138 b = b
139 .mapping(MemoryMapping {
140 start: mem.start as u64,
141 size: mem.bytes.len() as u64,
142 source: SlotPath::root(slot.clone()),
143 })
144 .initial_data(slot, Vec::new(), mem.bytes.len() as u64);
145 }
146 b.build()
147}
148
149/// Interpreter outcome for `prog`.
150pub fn replay_interp(prog: &Program) -> RawRun {
151 run_interpreter_raw(&BuiltCaps::for_image(&image_for(prog), 0))
152}
153
154/// Recompiler outcome for `prog`.
155pub fn replay_recomp(prog: &Program) -> RawRun {
156 run_recompiler_raw(&BuiltCaps::for_image(&image_for(prog), 0))
157}
158
159/// Both engines' outcomes for one program.
160#[derive(Debug, Clone, Copy)]
161pub struct Diff {
162 pub interp: RawRun,
163 pub recomp: RawRun,
164}
165
166impl Diff {
167 /// True iff the engines disagree on the exit reason, the full scratchpad
168 /// register signature, `x10`, or gas — any of which is a consensus
169 /// divergence. The signature comparison is the lossless upgrade over the old
170 /// x10 fold: a divergence in *any* captured register is caught, even one
171 /// that a fold would have cancelled.
172 pub fn diverges(&self) -> bool {
173 self.interp.exit_reason != self.recomp.exit_reason
174 || self.interp.return_value != self.recomp.return_value
175 || self.interp.gas_used != self.recomp.gas_used
176 || self.interp.scratchpad_head != self.recomp.scratchpad_head
177 }
178
179 /// One-line human description of the disagreement (for triage logs). Names
180 /// the first divergent signature slot (→ register) when the registers
181 /// disagree, else reports the exit/gas mismatch.
182 pub fn describe(&self) -> String {
183 let slot = self.first_divergent_slot();
184 let sig = match slot {
185 Some(s) => format!(
186 " sig@slot{s}(x{}): {:#018x} vs {:#018x}",
187 crate::oracle::slot_to_xreg(s as u8),
188 self.sig_reg(&self.interp, s),
189 self.sig_reg(&self.recomp, s),
190 ),
191 None => String::new(),
192 };
193 format!(
194 "interp{{exit={} x10={:#018x} gas={}}} vs recomp{{exit={} x10={:#018x} gas={}}}{sig}",
195 self.interp.exit_reason,
196 self.interp.return_value,
197 self.interp.gas_used,
198 self.recomp.exit_reason,
199 self.recomp.return_value,
200 self.recomp.gas_used,
201 )
202 }
203
204 /// Index of the first signature slot (0..`SIG_REGS`) whose register bytes
205 /// differ between the engines, if any.
206 fn first_divergent_slot(&self) -> Option<usize> {
207 (0..encode::SIG_REGS).find(|&s| {
208 self.interp.scratchpad_head[s * 8..s * 8 + 8]
209 != self.recomp.scratchpad_head[s * 8..s * 8 + 8]
210 })
211 }
212
213 /// The LE `u64` at signature slot `s` of `run`'s scratchpad head.
214 fn sig_reg(&self, run: &RawRun, s: usize) -> u64 {
215 u64::from_le_bytes(run.scratchpad_head[s * 8..s * 8 + 8].try_into().unwrap())
216 }
217}
218
219/// Run `prog` through both engines (sharing one `BuiltCaps`) and compare.
220pub fn diff(prog: &Program) -> Diff {
221 let built = BuiltCaps::for_image(&image_for(prog), 0);
222 // Interpreter first (never aborts the host); then the recompiler (which
223 // self-heals its sandbox on a guest abort).
224 let interp = run_interpreter_raw(&built);
225 let recomp = run_recompiler_raw(&built);
226 Diff { interp, recomp }
227}
228
229/// Run a batch through [`diff`], returning `(index, Diff)` for each diverging
230/// program. One long-lived sandbox handles the whole batch — no rebuilds (those
231/// were the source of host-heap corruption; a single sandbox runs thousands of
232/// distinct programs cleanly).
233pub fn diff_batch(progs: &[Program]) -> Vec<(usize, Diff)> {
234 let mut diverged = Vec::new();
235 for (i, prog) in progs.iter().enumerate() {
236 let d = diff(prog);
237 if d.diverges() {
238 diverged.push((i, d));
239 }
240 }
241 diverged
242}
243
244/// Convenience: seed register `xreg` (by x-number) to `val` in a slot-keyed
245/// init map. Mirrors the generator's seeding; handy for hand-built programs.
246pub fn seed_reg(init: &mut BTreeMap<u8, u64>, xreg: u8, val: u64) {
247 let slot = javm_exec::regs::reg_slot_or_ff(xreg);
248 if slot != 0xFF {
249 init.insert(slot, val);
250 }
251}