Skip to main content

javm_fuzz/
lib.rs

1//! `javm-fuzz` — differential fuzzer for the JAVM PVM2 ISA.
2//!
3//! PVM2 is RV64E + standard extensions (M, C, Zba, Zbb, Zbs, Zicond) + the
4//! custom Xjar/EEI. We have strong confidence the interpreter and the x86
5//! recompiler agree on *legitimate* programs (the conformance suite), but no
6//! systematic coverage of value-domain **edge cases** — INT_MIN/-1 division,
7//! shift-amount masking, W-op sign-extension, `mulhsu`, Zbb corner inputs.
8//! Those are exactly where a future ARM JIT lowering could silently diverge.
9//!
10//! This crate **generates** RV64E-subset programs ([`generate`]), runs each through
11//! the interpreter and the recompiler ([`replay`], linux/x86_64 only), and —
12//! offline — through a Sail/Spike oracle to mint static golden vectors. CI
13//! replays committed vectors and compares to the baked-in gold; the oracle
14//! never enters the build graph.
15//!
16//! ## State readout: scratchpad signature region
17//!
18//! A generated program ends with a deterministic **signature epilogue**
19//! ([`encode::signature_epilogue`]) that `sd`s its full final register file
20//! into a memory region mapped from the scratchpad (`slot[0]`) DataCap at
21//! [`SIG_BASE`]. Each engine surfaces that region's effective bytes back to the
22//! host (`InvocationResult::scratchpad_head`), so the differential compares the
23//! **complete, uncompressed** register signature — not the old lossy x10 fold —
24//! plus exit and gas. This exercises the v3 scratchpad + DataCap CoW return path
25//! end to end (kernel maps `slot[0]`, guest writes it, host reads it back).
26
27use serde::{Deserialize, Serialize};
28use std::collections::BTreeMap;
29
30pub mod encode;
31pub mod generate;
32pub mod oracle;
33pub mod shrink;
34
35// The dual-engine replay needs the Hyperlight recompiler host stack, gated to
36// linux/x86_64 (via `javm-bench`). The generator, encoders, and vector types
37// above are all portable.
38#[cfg(all(target_os = "linux", target_arch = "x86_64"))]
39pub mod replay;
40
41/// Bump when [`encode::signature_epilogue`] or the encoders change in a way that
42/// alters the golden signature of an unchanged program. Committed vectors record
43/// the version they were minted against; the replay test refuses a mismatch.
44pub const SIG_VERSION: u32 = 2;
45
46/// Guest VA the scratchpad (`slot[0]`) signature region maps at — the base of the
47/// instance data extent (`javm_cap::layout::DATA_BASE`). The signature epilogue
48/// stores the register file here; both engines surface its effective bytes.
49pub const SIG_BASE: u32 = javm_cap::layout::DATA_BASE;
50
51pub use encode::SIG_BYTES;
52
53/// The frozen ISA string PVM2's compute core conforms to (RV64E run as the
54/// RV64I superset for the oracle, never naming x16–x31).
55pub const ISA: &str = "rv64imc_zba_zbb_zbs_zicond";
56
57/// A generated test program: instruction words (body + signature epilogue, **no
58/// terminator**), the initial register seed, and an optional initial RW memory
59/// window. The replay harness appends the `ecalli 0` terminator.
60#[derive(Debug, Clone, Default, PartialEq, Eq)]
61pub struct Program {
62    /// Instruction words, body followed by the signature epilogue. No
63    /// terminator.
64    pub code: Vec<u32>,
65    /// Initial registers **by slot index 0..=12** (slot 0 = x1, 1 = x2,
66    /// s ≥ 2 = x(s+3); so x10 = slot 7). Matches `EndpointDef.initial_regs`
67    /// keying. x3/x4 (slots 13/14) are not seedable and start at 0 — the
68    /// generator never names them.
69    pub init_regs: BTreeMap<u8, u64>,
70    /// Optional initial RW data window (the generator confines all loads/stores
71    /// here, in-bounds and aligned, so every program is total on the oracle).
72    pub init_mem: Option<MemWindow>,
73}
74
75/// A contiguous RW memory window backing the program's loads/stores.
76#[derive(Debug, Clone, PartialEq, Eq)]
77pub struct MemWindow {
78    /// Guest virtual address of the window start (must be ≥ `DATA_BASE`).
79    pub start: u32,
80    /// Initial bytes; the window size is `bytes.len()`.
81    pub bytes: Vec<u8>,
82}
83
84impl Program {
85    /// The little-endian byte encoding of `code` (body + fold, no terminator).
86    pub fn code_bytes(&self) -> Vec<u8> {
87        encode::enc(&self.code)
88    }
89}
90
91// ============================================================================
92// Committed golden-vector schema (serde / JSON)
93// ============================================================================
94
95/// One committed vector file: provenance + a batch of vectors.
96#[derive(Serialize, Deserialize, Debug, Clone)]
97pub struct VectorFile {
98    pub meta: VectorMeta,
99    pub vectors: Vec<Vector>,
100}
101
102impl VectorFile {
103    /// Parse a committed vector file from JSON.
104    pub fn from_json(s: &str) -> serde_json::Result<Self> {
105        serde_json::from_str(s)
106    }
107}
108
109/// Provenance for a vector batch — enough to reproduce and to detect staleness.
110#[derive(Serialize, Deserialize, Debug, Clone)]
111pub struct VectorMeta {
112    /// git SHA of the generator at mint time.
113    pub gen_sha: String,
114    /// PRNG seed that produced this batch.
115    pub seed: u64,
116    /// Which oracle minted the golds, e.g. `"spike-1.1.1-dev"` or
117    /// `"interp-provisional"` (the interpreter as a stand-in before the
118    /// external oracle is wired).
119    pub oracle: String,
120    /// Frozen ISA string ([`ISA`]).
121    pub isa: String,
122    /// [`SIG_VERSION`] these golds were minted against.
123    pub sig_version: u32,
124}
125
126/// One golden vector: program + initial state + the oracle's projected
127/// post-state (register signature + exit).
128#[derive(Serialize, Deserialize, Debug, Clone)]
129pub struct Vector {
130    /// Stable, human-readable id, e.g. `"div_signed/intmin_div_neg1"`.
131    pub id: String,
132    #[serde(default)]
133    pub init: Init,
134    /// Hex of the program body + signature-epilogue bytes (no terminator).
135    pub code_hex: String,
136    pub gold: Gold,
137}
138
139/// Initial state seed for a vector.
140#[derive(Serialize, Deserialize, Debug, Clone, Default)]
141pub struct Init {
142    /// Initial registers by slot index 0..=12 (see [`Program::init_regs`]).
143    #[serde(default)]
144    pub regs: BTreeMap<u8, u64>,
145    /// Optional initial RW data window.
146    #[serde(default)]
147    pub mem: Option<MemInit>,
148}
149
150/// Serialized form of [`MemWindow`].
151#[derive(Serialize, Deserialize, Debug, Clone)]
152pub struct MemInit {
153    pub start: u32,
154    /// Hex of the initial window bytes; window size is the decoded length.
155    pub bytes_hex: String,
156}
157
158/// The oracle-computed expected post-state projection.
159#[derive(Serialize, Deserialize, Debug, Clone)]
160pub struct Gold {
161    /// Golden register signature — hex of the `SIG_REGS` post-body registers
162    /// (one LE `u64` per captured slot; [`SIG_BYTES`] bytes). This is the
163    /// effective bytes the engines' scratchpad region holds after the signature
164    /// epilogue stores the register file.
165    pub signature_hex: String,
166    /// Golden exit reason (4 = HostCall(0) for every total program).
167    pub exit: u32,
168    #[serde(default)]
169    pub exit_arg: u32,
170}
171
172impl Gold {
173    /// Decode `signature_hex` into the `SIG_BYTES`-byte signature.
174    pub fn signature(&self) -> Vec<u8> {
175        hex::decode(self.signature_hex.trim_start_matches("0x"))
176            .expect("gold signature_hex is valid hex")
177    }
178}
179
180impl Vector {
181    /// Decode this vector's program (body + fold words) and initial state.
182    pub fn to_program(&self) -> Program {
183        let bytes = hex::decode(self.code_hex.trim_start_matches("0x"))
184            .expect("vector code_hex is valid hex");
185        let code: Vec<u32> = bytes
186            .chunks_exact(4)
187            .map(|c| u32::from_le_bytes([c[0], c[1], c[2], c[3]]))
188            .collect();
189        let init_mem = self.init.mem.as_ref().map(|m| MemWindow {
190            start: m.start,
191            bytes: hex::decode(m.bytes_hex.trim_start_matches("0x"))
192                .expect("vector mem bytes_hex is valid hex"),
193        });
194        Program {
195            code,
196            init_regs: self.init.regs.clone(),
197            init_mem,
198        }
199    }
200
201    /// Build a vector from a program + the oracle's golden projection.
202    pub fn from_program(id: impl Into<String>, prog: &Program, gold: Gold) -> Self {
203        let mem = prog.init_mem.as_ref().map(|m| MemInit {
204            start: m.start,
205            bytes_hex: hex::encode(&m.bytes),
206        });
207        Vector {
208            id: id.into(),
209            init: Init {
210                regs: prog.init_regs.clone(),
211                mem,
212            },
213            code_hex: hex::encode(prog.code_bytes()),
214            gold,
215        }
216    }
217}