javm_fuzz/lib.rs
1//! `javm-fuzz` — differential fuzzer for the JAVM PVM2 ISA.
2//!
3//! PVM2 is RV64E + standard extensions (M, C, Zba, Zbb, Zbs, Zicond) + the
4//! custom Xjar/EEI. We have strong confidence the interpreter and the x86
5//! recompiler agree on *legitimate* programs (the conformance suite), but no
6//! systematic coverage of value-domain **edge cases** — INT_MIN/-1 division,
7//! shift-amount masking, W-op sign-extension, `mulhsu`, Zbb corner inputs.
8//! Those are exactly where a future ARM JIT lowering could silently diverge.
9//!
10//! This crate **generates** RV64E-subset programs ([`generate`]), runs each through
11//! the interpreter and the recompiler ([`replay`], linux/x86_64 only), and —
12//! offline — through a Sail/Spike oracle to mint static golden vectors. CI
13//! replays committed vectors and compares to the baked-in gold; the oracle
14//! never enters the build graph.
15//!
16//! ## State readout: scratchpad signature region
17//!
18//! A generated program ends with a deterministic **signature epilogue**
19//! ([`encode::signature_epilogue`]) that `sd`s its full final register file
20//! into a memory region mapped from the scratchpad (`slot[0]`) DataCap at
21//! [`SIG_BASE`]. Each engine surfaces that region's effective bytes back to the
22//! host (`InvocationResult::scratchpad_head`), so the differential compares the
23//! **complete, uncompressed** register signature — not the old lossy x10 fold —
24//! plus exit and gas. This exercises the v3 scratchpad + DataCap CoW return path
25//! end to end (kernel maps `slot[0]`, guest writes it, host reads it back).
26
27use serde::{Deserialize, Serialize};
28use std::collections::BTreeMap;
29
30pub mod encode;
31pub mod generate;
32pub mod oracle;
33pub mod shrink;
34
35// The dual-engine replay needs the Hyperlight recompiler host stack, gated to
36// linux/x86_64 (via `javm-bench`). The generator, encoders, and vector types
37// above are all portable.
38#[cfg(all(target_os = "linux", target_arch = "x86_64"))]
39pub mod replay;
40
41/// Bump when [`encode::signature_epilogue`] or the encoders change in a way that
42/// alters the golden signature of an unchanged program. Committed vectors record
43/// the version they were minted against; the replay test refuses a mismatch.
44pub const SIG_VERSION: u32 = 2;
45
46/// Guest VA the scratchpad (`slot[0]`) signature region maps at — the base of the
47/// instance data extent (`javm_cap::layout::DATA_BASE`). The signature epilogue
48/// stores the register file here; both engines surface its effective bytes.
49pub const SIG_BASE: u32 = javm_cap::layout::DATA_BASE;
50
51pub use encode::SIG_BYTES;
52
53/// The frozen ISA string PVM2's compute core conforms to (RV64E run as the
54/// RV64I superset for the oracle, never naming x16–x31).
55pub const ISA: &str = "rv64imc_zba_zbb_zbs_zicond";
56
57/// A generated test program: instruction words (body + signature epilogue, **no
58/// terminator**), the initial register seed, and an optional initial RW memory
59/// window. The replay harness appends the `ecalli 0` terminator.
60#[derive(Debug, Clone, Default, PartialEq, Eq)]
61pub struct Program {
62 /// Instruction words, body followed by the signature epilogue. No
63 /// terminator.
64 pub code: Vec<u32>,
65 /// Initial registers **by slot index 0..=12** (slot 0 = x1, 1 = x2,
66 /// s ≥ 2 = x(s+3); so x10 = slot 7). Matches `EndpointDef.initial_regs`
67 /// keying. x3/x4 (slots 13/14) are not seedable and start at 0 — the
68 /// generator never names them.
69 pub init_regs: BTreeMap<u8, u64>,
70 /// Optional initial RW data window (the generator confines all loads/stores
71 /// here, in-bounds and aligned, so every program is total on the oracle).
72 pub init_mem: Option<MemWindow>,
73}
74
75/// A contiguous RW memory window backing the program's loads/stores.
76#[derive(Debug, Clone, PartialEq, Eq)]
77pub struct MemWindow {
78 /// Guest virtual address of the window start (must be ≥ `DATA_BASE`).
79 pub start: u32,
80 /// Initial bytes; the window size is `bytes.len()`.
81 pub bytes: Vec<u8>,
82}
83
84impl Program {
85 /// The little-endian byte encoding of `code` (body + fold, no terminator).
86 pub fn code_bytes(&self) -> Vec<u8> {
87 encode::enc(&self.code)
88 }
89}
90
91// ============================================================================
92// Committed golden-vector schema (serde / JSON)
93// ============================================================================
94
95/// One committed vector file: provenance + a batch of vectors.
96#[derive(Serialize, Deserialize, Debug, Clone)]
97pub struct VectorFile {
98 pub meta: VectorMeta,
99 pub vectors: Vec<Vector>,
100}
101
102impl VectorFile {
103 /// Parse a committed vector file from JSON.
104 pub fn from_json(s: &str) -> serde_json::Result<Self> {
105 serde_json::from_str(s)
106 }
107}
108
109/// Provenance for a vector batch — enough to reproduce and to detect staleness.
110#[derive(Serialize, Deserialize, Debug, Clone)]
111pub struct VectorMeta {
112 /// git SHA of the generator at mint time.
113 pub gen_sha: String,
114 /// PRNG seed that produced this batch.
115 pub seed: u64,
116 /// Which oracle minted the golds, e.g. `"spike-1.1.1-dev"` or
117 /// `"interp-provisional"` (the interpreter as a stand-in before the
118 /// external oracle is wired).
119 pub oracle: String,
120 /// Frozen ISA string ([`ISA`]).
121 pub isa: String,
122 /// [`SIG_VERSION`] these golds were minted against.
123 pub sig_version: u32,
124}
125
126/// One golden vector: program + initial state + the oracle's projected
127/// post-state (register signature + exit).
128#[derive(Serialize, Deserialize, Debug, Clone)]
129pub struct Vector {
130 /// Stable, human-readable id, e.g. `"div_signed/intmin_div_neg1"`.
131 pub id: String,
132 #[serde(default)]
133 pub init: Init,
134 /// Hex of the program body + signature-epilogue bytes (no terminator).
135 pub code_hex: String,
136 pub gold: Gold,
137}
138
139/// Initial state seed for a vector.
140#[derive(Serialize, Deserialize, Debug, Clone, Default)]
141pub struct Init {
142 /// Initial registers by slot index 0..=12 (see [`Program::init_regs`]).
143 #[serde(default)]
144 pub regs: BTreeMap<u8, u64>,
145 /// Optional initial RW data window.
146 #[serde(default)]
147 pub mem: Option<MemInit>,
148}
149
150/// Serialized form of [`MemWindow`].
151#[derive(Serialize, Deserialize, Debug, Clone)]
152pub struct MemInit {
153 pub start: u32,
154 /// Hex of the initial window bytes; window size is the decoded length.
155 pub bytes_hex: String,
156}
157
158/// The oracle-computed expected post-state projection.
159#[derive(Serialize, Deserialize, Debug, Clone)]
160pub struct Gold {
161 /// Golden register signature — hex of the `SIG_REGS` post-body registers
162 /// (one LE `u64` per captured slot; [`SIG_BYTES`] bytes). This is the
163 /// effective bytes the engines' scratchpad region holds after the signature
164 /// epilogue stores the register file.
165 pub signature_hex: String,
166 /// Golden exit reason (4 = HostCall(0) for every total program).
167 pub exit: u32,
168 #[serde(default)]
169 pub exit_arg: u32,
170}
171
172impl Gold {
173 /// Decode `signature_hex` into the `SIG_BYTES`-byte signature.
174 pub fn signature(&self) -> Vec<u8> {
175 hex::decode(self.signature_hex.trim_start_matches("0x"))
176 .expect("gold signature_hex is valid hex")
177 }
178}
179
180impl Vector {
181 /// Decode this vector's program (body + fold words) and initial state.
182 pub fn to_program(&self) -> Program {
183 let bytes = hex::decode(self.code_hex.trim_start_matches("0x"))
184 .expect("vector code_hex is valid hex");
185 let code: Vec<u32> = bytes
186 .chunks_exact(4)
187 .map(|c| u32::from_le_bytes([c[0], c[1], c[2], c[3]]))
188 .collect();
189 let init_mem = self.init.mem.as_ref().map(|m| MemWindow {
190 start: m.start,
191 bytes: hex::decode(m.bytes_hex.trim_start_matches("0x"))
192 .expect("vector mem bytes_hex is valid hex"),
193 });
194 Program {
195 code,
196 init_regs: self.init.regs.clone(),
197 init_mem,
198 }
199 }
200
201 /// Build a vector from a program + the oracle's golden projection.
202 pub fn from_program(id: impl Into<String>, prog: &Program, gold: Gold) -> Self {
203 let mem = prog.init_mem.as_ref().map(|m| MemInit {
204 start: m.start,
205 bytes_hex: hex::encode(&m.bytes),
206 });
207 Vector {
208 id: id.into(),
209 init: Init {
210 regs: prog.init_regs.clone(),
211 mem,
212 },
213 code_hex: hex::encode(prog.code_bytes()),
214 gold,
215 }
216 }
217}