Skip to main content

javm_fuzz/
replay.rs

1//! Dual-engine replay: run a [`Program`] through the interpreter and the x86
2//! recompiler and compare. Gated to linux/x86_64 (the recompiler needs the
3//! Hyperlight host stack, so the whole `javm-bench` crate is gated to it).
4//!
5//! Both engines are driven through the *same* `javm-bench` `BuiltCaps` →
6//! `invoke_cached` path, so they receive byte-identical caps/initial-state —
7//! any divergence is a real engine disagreement, never a setup skew.
8
9use crate::{Program, SIG_BASE, encode};
10use javm_bench::{BuiltCaps, RawRun, run_interpreter_raw, run_recompiler_raw};
11use javm_cap::abi::SCRATCHPAD_SLOT;
12use javm_cap::image::{EndpointDef, Image, ImageBuilder, MemoryMapping};
13use javm_cap::slot::{Key, SlotPath};
14use std::collections::BTreeMap;
15
16/// Cnode slot the fuzz memory window's backing data cap occupies.
17const WINDOW_SLOT: u32 = 1;
18
19/// One page — the scratchpad (`slot[0]`) signature region size (the program's
20/// signature epilogue stores the `SIG_BYTES`-byte register file into its head).
21const SIG_REGION_BYTES: u64 = 4096;
22
23/// Map the scratchpad (`slot[0]`) signature region at [`SIG_BASE`] into the
24/// builder: an empty-content initial data slot of one page. The guest's
25/// signature epilogue CoWs this region during the run; both engines surface its
26/// effective bytes as the run's register signature.
27fn add_signature_region(b: ImageBuilder) -> ImageBuilder {
28    let slot = Key::from(SCRATCHPAD_SLOT);
29    b.mapping(MemoryMapping {
30        start: SIG_BASE as u64,
31        size: SIG_REGION_BYTES,
32        source: SlotPath::root(slot.clone()),
33    })
34    .initial_data(slot, Vec::new(), SIG_REGION_BYTES)
35}
36
37/// Build an `Image` from raw instruction `words` (+ `ecalli 0` terminator)
38/// with a **pinned read-only** data cap of `ro_bytes` mapped at `ro_start`
39/// — for category-#3 read-only-cluster differential tests. Both engines
40/// materialize it `PinnedCapRo` (interp perm-RO, recompiler MatRange) and
41/// charge it per 2 MiB cluster.
42pub fn image_with_ro(words: &[u32], ro_start: u32, ro_bytes: &[u8]) -> Image {
43    let mut code = encode::enc(words);
44    code.extend_from_slice(&encode::enc(&[encode::HALT]));
45    let slot = Key::from((WINDOW_SLOT + 1) as u8);
46    ImageBuilder::new()
47        .code(code)
48        .endpoint(
49            Key::from(0u8),
50            EndpointDef {
51                entry_pc: 0,
52                arg_registers: 0,
53                arg_cnode_size: 0,
54                initial_regs: BTreeMap::new(),
55            },
56        )
57        .mapping(MemoryMapping {
58            start: ro_start as u64,
59            size: ro_bytes.len() as u64,
60            source: SlotPath::root(slot.clone()),
61        })
62        .pinned_data(slot, ro_bytes.to_vec(), ro_bytes.len() as u64)
63        .build()
64}
65
66/// Build an `Image` from raw `words` with **several** pinned read-only data
67/// caps, each `(start, bytes)` — for multi-cap read-only-cluster differential
68/// tests (e.g. two distinct caps sharing one 2 MiB cluster). Each cap takes its
69/// own cnode slot, so the recompiler resolves each as a separate `PinnedCapRo`
70/// `MatRange` with its own source PA, exactly as production does.
71pub fn image_with_ro_caps(words: &[u32], caps: &[(u32, &[u8])]) -> Image {
72    let mut code = encode::enc(words);
73    code.extend_from_slice(&encode::enc(&[encode::HALT]));
74    let mut b = ImageBuilder::new().code(code).endpoint(
75        Key::from(0u8),
76        EndpointDef {
77            entry_pc: 0,
78            arg_registers: 0,
79            arg_cnode_size: 0,
80            initial_regs: BTreeMap::new(),
81        },
82    );
83    for (i, (start, bytes)) in caps.iter().enumerate() {
84        let slot = Key::from((WINDOW_SLOT + 1 + i as u32) as u8);
85        b = b
86            .mapping(MemoryMapping {
87                start: *start as u64,
88                size: bytes.len() as u64,
89                source: SlotPath::root(slot.clone()),
90            })
91            .pinned_data(slot, bytes.to_vec(), bytes.len() as u64);
92    }
93    b.build()
94}
95
96/// Run a pre-built `Image` through both engines and compare.
97pub fn diff_image(img: &Image) -> Diff {
98    let built = BuiltCaps::for_image(img, 0);
99    let interp = run_interpreter_raw(&built);
100    let recomp = run_recompiler_raw(&built);
101    Diff { interp, recomp }
102}
103
104/// Build the `Image` for a program: its code (body + signature epilogue) plus
105/// the appended `ecalli 0` terminator, entered at pc 0 with the program's
106/// initial register seed.
107///
108/// The Image always maps the scratchpad (`slot[0]`) signature region at
109/// [`SIG_BASE`] (via `add_signature_region`); the program's signature epilogue
110/// stores its register file there, and both engines surface the region's
111/// effective bytes for the lossless differential.
112///
113/// When the program declares an `init_mem` window, the Image declares a
114/// matching RW data mapping so **both** engines size their data extent to
115/// cover it and lazily materialize (category #3) the same pages. The window is
116/// backed by an *empty* initial slot (zero-filled, page-aligned `mem_buf`), so
117/// both engines treat it as ephemeral — the lazy-materialization charge is
118/// identical regardless, and this keeps the differential off the cap-PA
119/// page-in path (whose alignment is a separate concern).
120pub fn image_for(prog: &Program) -> Image {
121    let mut code = prog.code_bytes();
122    code.extend_from_slice(&encode::enc(&[encode::HALT]));
123
124    let mut b = ImageBuilder::new().code(code).endpoint(
125        Key::from(0u8),
126        EndpointDef {
127            entry_pc: 0,
128            arg_registers: 0,
129            arg_cnode_size: 0,
130            initial_regs: prog.init_regs.clone(),
131        },
132    );
133    b = add_signature_region(b);
134    if let Some(mem) = &prog.init_mem {
135        let slot = Key::from((WINDOW_SLOT) as u8);
136        // Empty content → no overlay; the mapping only sizes the data extent,
137        // and the window materializes as ephemeral zero pages on both engines.
138        b = b
139            .mapping(MemoryMapping {
140                start: mem.start as u64,
141                size: mem.bytes.len() as u64,
142                source: SlotPath::root(slot.clone()),
143            })
144            .initial_data(slot, Vec::new(), mem.bytes.len() as u64);
145    }
146    b.build()
147}
148
149/// Interpreter outcome for `prog`.
150pub fn replay_interp(prog: &Program) -> RawRun {
151    run_interpreter_raw(&BuiltCaps::for_image(&image_for(prog), 0))
152}
153
154/// Recompiler outcome for `prog`.
155pub fn replay_recomp(prog: &Program) -> RawRun {
156    run_recompiler_raw(&BuiltCaps::for_image(&image_for(prog), 0))
157}
158
159/// Both engines' outcomes for one program.
160#[derive(Debug, Clone, Copy)]
161pub struct Diff {
162    pub interp: RawRun,
163    pub recomp: RawRun,
164}
165
166impl Diff {
167    /// True iff the engines disagree on the exit reason, the full scratchpad
168    /// register signature, `x10`, or gas — any of which is a consensus
169    /// divergence. The signature comparison is the lossless upgrade over the old
170    /// x10 fold: a divergence in *any* captured register is caught, even one
171    /// that a fold would have cancelled.
172    pub fn diverges(&self) -> bool {
173        self.interp.exit_reason != self.recomp.exit_reason
174            || self.interp.return_value != self.recomp.return_value
175            || self.interp.gas_used != self.recomp.gas_used
176            || self.interp.scratchpad_head != self.recomp.scratchpad_head
177    }
178
179    /// One-line human description of the disagreement (for triage logs). Names
180    /// the first divergent signature slot (→ register) when the registers
181    /// disagree, else reports the exit/gas mismatch.
182    pub fn describe(&self) -> String {
183        let slot = self.first_divergent_slot();
184        let sig = match slot {
185            Some(s) => format!(
186                " sig@slot{s}(x{}): {:#018x} vs {:#018x}",
187                crate::oracle::slot_to_xreg(s as u8),
188                self.sig_reg(&self.interp, s),
189                self.sig_reg(&self.recomp, s),
190            ),
191            None => String::new(),
192        };
193        format!(
194            "interp{{exit={} x10={:#018x} gas={}}} vs recomp{{exit={} x10={:#018x} gas={}}}{sig}",
195            self.interp.exit_reason,
196            self.interp.return_value,
197            self.interp.gas_used,
198            self.recomp.exit_reason,
199            self.recomp.return_value,
200            self.recomp.gas_used,
201        )
202    }
203
204    /// Index of the first signature slot (0..`SIG_REGS`) whose register bytes
205    /// differ between the engines, if any.
206    fn first_divergent_slot(&self) -> Option<usize> {
207        (0..encode::SIG_REGS).find(|&s| {
208            self.interp.scratchpad_head[s * 8..s * 8 + 8]
209                != self.recomp.scratchpad_head[s * 8..s * 8 + 8]
210        })
211    }
212
213    /// The LE `u64` at signature slot `s` of `run`'s scratchpad head.
214    fn sig_reg(&self, run: &RawRun, s: usize) -> u64 {
215        u64::from_le_bytes(run.scratchpad_head[s * 8..s * 8 + 8].try_into().unwrap())
216    }
217}
218
219/// Run `prog` through both engines (sharing one `BuiltCaps`) and compare.
220pub fn diff(prog: &Program) -> Diff {
221    let built = BuiltCaps::for_image(&image_for(prog), 0);
222    // Interpreter first (never aborts the host); then the recompiler (which
223    // self-heals its sandbox on a guest abort).
224    let interp = run_interpreter_raw(&built);
225    let recomp = run_recompiler_raw(&built);
226    Diff { interp, recomp }
227}
228
229/// Run a batch through [`diff`], returning `(index, Diff)` for each diverging
230/// program. One long-lived sandbox handles the whole batch — no rebuilds (those
231/// were the source of host-heap corruption; a single sandbox runs thousands of
232/// distinct programs cleanly).
233pub fn diff_batch(progs: &[Program]) -> Vec<(usize, Diff)> {
234    let mut diverged = Vec::new();
235    for (i, prog) in progs.iter().enumerate() {
236        let d = diff(prog);
237        if d.diverges() {
238            diverged.push((i, d));
239        }
240    }
241    diverged
242}
243
244/// Convenience: seed register `xreg` (by x-number) to `val` in a slot-keyed
245/// init map. Mirrors the generator's seeding; handy for hand-built programs.
246pub fn seed_reg(init: &mut BTreeMap<u8, u64>, xreg: u8, val: u64) {
247    let slot = javm_exec::regs::reg_slot_or_ff(xreg);
248    if slot != 0xFF {
249        init.insert(slot, val);
250    }
251}