Skip to main content

javm_fuzz/
generate.rs

1//! RV64E-subset program generator.
2//!
3//! Two modes, sharing one boundary-biased operand pool:
4//! - [`enumerate_boundary`] — deterministic, near-exhaustive over each op ×
5//!   boundary-operand combination. This is what *guarantees* the high-value
6//!   corners appear (e.g. `div x?, INT_MIN, -1`).
7//! - [`Gen::random_program`] — random instruction sequences for cross-op /
8//!   cross-state breadth, seeded for reproducibility.
9//!
10//! Every generated program is **total** by construction: register-only,
11//! straight-line, over x0–x15 minus the spilled (x3/x4) and x6/x7 (left at 0 so
12//! their signature slots are deterministic), ending in
13//! [`encode::signature_epilogue`]. So it always halts cleanly on a conformant
14//! engine — and on the oracle — with a defined register signature. (The div
15//! corners are *included*: RISC-V defines INT_MIN/-1 as a value, so the oracle
16//! and interpreter produce one; only a buggy recompiler diverges.)
17//!
18//! v1 emits register-only ops (no loads/stores/branches — those want a memory
19//! window / control flow and come later); this already covers the
20//! value-domain edge cases that matter most (div overflow, shift masking, W-op
21//! sign-extension, `mulhsu`, Zbb corners).
22
23use crate::Program;
24use crate::encode::{self, Fmt, OpSpec};
25use javm_exec::regs::reg_slot_or_ff;
26use std::collections::BTreeMap;
27
28/// Boundary-biased operand pool — the values bugs hide at.
29pub const OPERANDS: &[u64] = &[
30    0x0000_0000_0000_0000, // 0
31    0x0000_0000_0000_0001, // 1
32    0x0000_0000_0000_0002, // 2
33    0xFFFF_FFFF_FFFF_FFFF, // -1
34    0x8000_0000_0000_0000, // i64::MIN
35    0x7FFF_FFFF_FFFF_FFFF, // i64::MAX
36    0x0000_0000_7FFF_FFFF, // i32::MAX
37    0x0000_0000_8000_0000, // i32::MIN, zero-extended
38    0xFFFF_FFFF_8000_0000, // i32::MIN, sign-extended
39    0x0000_0000_FFFF_FFFF, // u32::MAX
40    0x0000_0000_0000_0010, // 16
41    0x0000_0000_0000_0040, // 64
42    0xDEAD_BEEF_CAFE_BABE, // arbitrary
43];
44
45/// Boundary 12-bit-signed immediates (for I-type ALU).
46pub const IMMS: &[i32] = &[0, 1, -1, 2, -2, 2047, -2048, 0x555, -0x555];
47
48/// Boundary shift amounts — includes the out-of-range 64/65 that exercise the
49/// `& 63` / `& 31` masking both engines (must) perform.
50pub const SHAMTS: &[i32] = &[0, 1, 7, 8, 31, 32, 63, 64, 65];
51
52/// Boundary 20-bit upper immediates (for `lui`/`auipc`).
53pub const IMM20S: &[i32] = &[0, 1, 0xF_FFFF, 0x8_0000, 0x7_FFFF, 0xA_5A5A];
54
55/// Writable destination registers — x1, x2, x5, x8–x15. Excludes x0 (zero),
56/// x3/x4 (spilled, never named), x6/x7 (kept at 0 for deterministic signature
57/// slots), x16–31 (reserved).
58const DEST: &[u8] = &[1, 2, 5, 8, 9, 10, 11, 12, 13, 14, 15];
59/// Source registers — `DEST` plus x0.
60const SRC: &[u8] = &[0, 1, 2, 5, 8, 9, 10, 11, 12, 13, 14, 15];
61
62/// Seed register `xreg` to `val` in a slot-keyed init map (no-op for x0 /
63/// reserved, which have no slot).
64fn seed(init: &mut BTreeMap<u8, u64>, xreg: u8, val: u64) {
65    let slot = reg_slot_or_ff(xreg);
66    if slot != 0xFF {
67        init.insert(slot, val);
68    }
69}
70
71/// Wrap a body in the signature epilogue → a complete program.
72fn finish(body: Vec<u32>, init_regs: BTreeMap<u8, u64>) -> Program {
73    let mut code = body;
74    code.extend(encode::signature_epilogue(crate::SIG_BASE));
75    Program {
76        code,
77        init_regs,
78        init_mem: None,
79    }
80}
81
82/// Register-only ops (skip loads/stores/branches).
83fn reg_only_ops() -> impl Iterator<Item = &'static OpSpec> {
84    encode::OPS
85        .iter()
86        .filter(|s| !s.touches_memory_or_control())
87}
88
89/// Deterministic boundary enumeration. For each register-only op, emit one
90/// program per relevant boundary-operand combination, seeding source registers
91/// x8/x9 and writing the result to x10. Guarantees the corner cases appear.
92pub fn enumerate_boundary() -> Vec<Program> {
93    const RA: u8 = 8;
94    const RB: u8 = 9;
95    const RD: u8 = 10;
96    let mut progs = Vec::new();
97    for spec in reg_only_ops() {
98        match spec.fmt {
99            // Two source operands: full a × b boundary cross-product.
100            Fmt::R => {
101                for &a in OPERANDS {
102                    for &b in OPERANDS {
103                        let mut init = BTreeMap::new();
104                        seed(&mut init, RA, a);
105                        seed(&mut init, RB, b);
106                        let body = vec![encode::encode_op(spec, RD, RA, RB, 0)];
107                        progs.push(finish(body, init));
108                    }
109                }
110            }
111            // One source operand + a boundary immediate.
112            Fmt::I => {
113                for &a in OPERANDS {
114                    for &imm in IMMS {
115                        let mut init = BTreeMap::new();
116                        seed(&mut init, RA, a);
117                        let body = vec![encode::encode_op(spec, RD, RA, 0, imm)];
118                        progs.push(finish(body, init));
119                    }
120                }
121            }
122            // One source operand + a boundary shift amount.
123            Fmt::IShift | Fmt::IShift32 => {
124                for &a in OPERANDS {
125                    for &sh in SHAMTS {
126                        let mut init = BTreeMap::new();
127                        seed(&mut init, RA, a);
128                        let body = vec![encode::encode_op(spec, RD, RA, 0, sh)];
129                        progs.push(finish(body, init));
130                    }
131                }
132            }
133            // One source operand, no immediate.
134            Fmt::Unary => {
135                for &a in OPERANDS {
136                    let mut init = BTreeMap::new();
137                    seed(&mut init, RA, a);
138                    let body = vec![encode::encode_op(spec, RD, RA, 0, 0)];
139                    progs.push(finish(body, init));
140                }
141            }
142            // Upper immediate (no source register).
143            Fmt::U => {
144                for &imm in IMM20S {
145                    let body = vec![encode::encode_op(spec, RD, 0, 0, imm)];
146                    progs.push(finish(body, BTreeMap::new()));
147                }
148            }
149            Fmt::Store | Fmt::Branch => {} // skipped (filtered out above)
150        }
151    }
152    progs
153}
154
155/// Small, fast, deterministic PRNG (xorshift64* — state never zero).
156pub struct XorShift64(u64);
157
158impl XorShift64 {
159    pub fn new(seed: u64) -> Self {
160        XorShift64(seed ^ 0x9E37_79B9_7F4A_7C15 | 1)
161    }
162    pub fn next_u64(&mut self) -> u64 {
163        let mut x = self.0;
164        x ^= x << 13;
165        x ^= x >> 7;
166        x ^= x << 17;
167        self.0 = x;
168        x.wrapping_mul(0x2545_F491_4F6C_DD1D)
169    }
170    fn pick<T: Copy>(&mut self, xs: &[T]) -> T {
171        xs[(self.next_u64() % xs.len() as u64) as usize]
172    }
173}
174
175/// Random program generator.
176pub struct Gen {
177    rng: XorShift64,
178}
179
180impl Gen {
181    pub fn new(seed: u64) -> Self {
182        Gen {
183            rng: XorShift64::new(seed),
184        }
185    }
186
187    /// A random straight-line register-only program of `body_len` instructions,
188    /// with `DEST` registers pre-seeded to random boundary operands.
189    pub fn random_program(&mut self, body_len: usize) -> Program {
190        let mut init = BTreeMap::new();
191        for &xr in DEST {
192            // Seed most destinations with a boundary value (some left at 0).
193            if self.rng.next_u64() & 3 != 0 {
194                seed(&mut init, xr, self.rng.pick(OPERANDS));
195            }
196        }
197        let ops: Vec<&OpSpec> = reg_only_ops().collect();
198        let mut body = Vec::with_capacity(body_len);
199        for _ in 0..body_len {
200            let spec = self.rng.pick(&ops);
201            let rd = self.rng.pick(DEST);
202            let rs1 = self.rng.pick(SRC);
203            let rs2 = self.rng.pick(SRC);
204            let imm = match spec.fmt {
205                Fmt::IShift | Fmt::IShift32 => self.rng.pick(SHAMTS),
206                Fmt::U => self.rng.pick(IMM20S),
207                _ => self.rng.pick(IMMS),
208            };
209            body.push(encode::encode_op(spec, rd, rs1, rs2, imm));
210        }
211        finish(body, init)
212    }
213
214    /// `count` random programs, each `body_len` instructions.
215    pub fn random_batch(&mut self, count: usize, body_len: usize) -> Vec<Program> {
216        (0..count).map(|_| self.random_program(body_len)).collect()
217    }
218}
219
220#[cfg(test)]
221mod tests {
222    use super::*;
223    use javm_exec::instruction::{Inst, decode};
224
225    /// Every generated instruction must decode to a non-`Reserved`,
226    /// non-terminator instruction (terminators come only from the appended
227    /// HALT, which these programs don't include) — i.e. the generator never
228    /// emits x3/x4, x16–31, or a bad encoding.
229    fn assert_all_valid(prog: &Program) {
230        let bytes = prog.code_bytes();
231        let mut off = 0;
232        while off < bytes.len() {
233            let (inst, len) = decode(&bytes[off..]).expect("decodes");
234            let w =
235                u32::from_le_bytes([bytes[off], bytes[off + 1], bytes[off + 2], bytes[off + 3]]);
236            assert!(
237                !matches!(inst, Inst::Reserved { .. }),
238                "generator emitted a reserved encoding {w:#010x} at byte {off}",
239            );
240            off += len as usize;
241        }
242    }
243
244    #[test]
245    fn boundary_enumeration_is_all_valid_and_hits_div() {
246        let progs = enumerate_boundary();
247        assert!(progs.len() > 100, "expected a broad enumeration");
248        for p in &progs {
249            assert_all_valid(p);
250        }
251    }
252
253    #[test]
254    fn random_programs_are_valid_and_deterministic() {
255        let a = Gen::new(42).random_batch(50, 8);
256        let b = Gen::new(42).random_batch(50, 8);
257        assert_eq!(a, b, "same seed must reproduce the same programs");
258        for p in &a {
259            assert_all_valid(p);
260        }
261    }
262}