Skip to main content

javm_exec/
decode.rs

1//! PVM bytecode predecoding.
2//!
3//! Cherry-picked from v2 `javm/src/interpreter/mod.rs` (the predecode
4//! pass — basic-block / gas-block detection, per-block gas costs,
5//! flattened `DecodedInst` array, `pc_to_idx` map). No cap awareness.
6//!
7//! Entry point: [`predecode`] takes a [`PvmProgram`] and returns a
8//! [`Predecoded`] bundle that the interpreter (and recompiler) execute
9//! against. The expensive one-time work (gas-block computation,
10//! per-instruction predecoding, target resolution) is done here so the
11//! hot loop in `interp` is branch-light.
12
13use crate::args::{self, Args};
14use crate::instruction::Opcode;
15use crate::program::PvmProgram;
16use alloc::vec::Vec;
17
18/// Pre-decoded instruction for the fast interpreter / JIT path.
19///
20/// Flattened representation: all operands stored directly (no enum
21/// discrimination at runtime). 40 bytes.
22#[derive(Clone, Copy, Debug)]
23pub struct DecodedInst {
24    pub opcode: Opcode,
25    /// Register A (first register operand, context-dependent).
26    pub ra: u8,
27    /// Register B (second register operand, context-dependent).
28    pub rb: u8,
29    /// Register D (destination register, context-dependent).
30    pub rd: u8,
31    /// First immediate / offset value.
32    pub imm1: u64,
33    /// Second immediate / offset value.
34    pub imm2: u64,
35    /// Byte offset of this instruction in the code.
36    pub pc: u32,
37    /// Byte offset of the next sequential instruction.
38    pub next_pc: u32,
39    /// Pre-resolved instruction index for the next sequential instruction.
40    pub next_idx: u32,
41    /// Pre-resolved instruction index for the branch/jump target.
42    /// `u32::MAX` = invalid (out-of-program target).
43    pub target_idx: u32,
44    /// Gas cost to charge at gas-block entry (0 for non-gas-block-start
45    /// instructions). Gas blocks = {PC=0} ∪ {post-terminator PCs};
46    /// branch targets are NOT gas-block starts.
47    pub bb_gas_cost: u32,
48}
49
50const _: () = assert!(core::mem::size_of::<DecodedInst>() == 40);
51
52/// Bundle of predecoded state shared by interp / JIT.
53#[derive(Clone, Debug)]
54pub struct Predecoded {
55    pub decoded_insts: Vec<DecodedInst>,
56    /// Map from PC byte offset → instruction index. `u32::MAX` = invalid.
57    pub pc_to_idx: Vec<u32>,
58    /// Valid basic-block starts (post-terminator PCs ∪ static branch targets).
59    pub basic_block_starts: Vec<bool>,
60    /// Gas-block start cost (indexed by PC). Only entries at gas-block
61    /// starts are meaningful; others are 0.
62    pub block_gas_costs: Vec<u32>,
63}
64
65/// Predecode a program: compute block starts, gas costs, and flat
66/// `DecodedInst` array.
67pub fn predecode(program: &PvmProgram) -> Predecoded {
68    let basic_block_starts = compute_basic_block_starts(&program.code, &program.bitmask);
69    let gas_block_starts = compute_gas_block_starts(&program.code, &program.bitmask);
70    let block_gas_costs = compute_block_gas_costs(
71        &program.code,
72        &program.bitmask,
73        &gas_block_starts,
74        program.mem_cycles,
75    );
76    let (decoded_insts, pc_to_idx) = predecode_instructions(
77        &program.code,
78        &program.bitmask,
79        &basic_block_starts,
80        &gas_block_starts,
81        &block_gas_costs,
82    );
83    Predecoded {
84        decoded_insts,
85        pc_to_idx,
86        basic_block_starts,
87        block_gas_costs,
88    }
89}
90
91// ---- Basic-block / gas-block start detection ----
92
93pub fn compute_basic_block_starts_with_skips(code: &[u8], bitmask: &[u8]) -> (Vec<bool>, Vec<u8>) {
94    compute_bb_starts_inner(code, bitmask)
95}
96
97pub fn compute_basic_block_starts(code: &[u8], bitmask: &[u8]) -> Vec<bool> {
98    compute_bb_starts_inner(code, bitmask).0
99}
100
101/// Compute gas block starts per the JAM spec: `{PC=0} ∪ {post-terminator PCs}`.
102///
103/// Unlike `compute_basic_block_starts`, this does NOT include branch
104/// targets. Gas blocks are defined solely by terminator boundaries.
105pub fn compute_gas_block_starts(code: &[u8], bitmask: &[u8]) -> Vec<bool> {
106    let len = code.len();
107    if len == 0 {
108        return vec![];
109    }
110    let mut starts = vec![false; len];
111
112    if !bitmask.is_empty() && bitmask[0] == 1 && Opcode::from_byte(code[0]).is_some() {
113        starts[0] = true;
114    }
115
116    let mut i = 0;
117    while i < len {
118        if i >= bitmask.len() || bitmask[i] != 1 {
119            i += 1;
120            continue;
121        }
122        let Some(op) = Opcode::from_byte(code[i]) else {
123            i += 1;
124            continue;
125        };
126
127        let skip = {
128            let mut s = 0;
129            for j in 0..25 {
130                let idx = i + 1 + j;
131                let bit = if idx < bitmask.len() { bitmask[idx] } else { 1 };
132                if bit == 1 {
133                    s = j;
134                    break;
135                }
136            }
137            s
138        };
139
140        if op.is_terminator() {
141            let next = i + 1 + skip;
142            if next < len && next < bitmask.len() && bitmask[next] == 1 {
143                starts[next] = true;
144            }
145        }
146        i += 1 + skip;
147    }
148
149    starts
150}
151
152fn compute_bb_starts_inner(code: &[u8], bitmask: &[u8]) -> (Vec<bool>, Vec<u8>) {
153    let len = code.len();
154    if len == 0 {
155        return (vec![], vec![]);
156    }
157    let mut starts = vec![false; len];
158    let mut skip_table = vec![0u8; len];
159
160    if !bitmask.is_empty() && bitmask[0] == 1 && Opcode::from_byte(code[0]).is_some() {
161        starts[0] = true;
162    }
163
164    let mut i = 0;
165    while i < len {
166        if i >= bitmask.len() || bitmask[i] != 1 {
167            i += 1;
168            continue;
169        }
170        let Some(op) = Opcode::from_byte(code[i]) else {
171            i += 1;
172            continue;
173        };
174
175        let skip = {
176            let mut s = 0;
177            for j in 0..25 {
178                let idx = i + 1 + j;
179                let bit = if idx < bitmask.len() { bitmask[idx] } else { 1 };
180                if bit == 1 {
181                    s = j;
182                    break;
183                }
184            }
185            s
186        };
187        skip_table[i] = skip as u8;
188
189        if op.is_terminator() {
190            let next = i + 1 + skip;
191            if next < len && next < bitmask.len() && bitmask[next] == 1 {
192                starts[next] = true;
193            }
194        }
195
196        // For static branch / jump targets, mark the target.
197        let cat = op.category();
198        match cat {
199            crate::instruction::InstructionCategory::OneOffset if i + 5 <= len => {
200                let off = i32::from_le_bytes([code[i + 1], code[i + 2], code[i + 3], code[i + 4]]);
201                let target = (i as i64 + off as i64) as usize;
202                if target < len && target < bitmask.len() && bitmask[target] == 1 {
203                    starts[target] = true;
204                }
205            }
206            crate::instruction::InstructionCategory::TwoRegOneOffset if i + 6 <= len => {
207                let off = i32::from_le_bytes([code[i + 2], code[i + 3], code[i + 4], code[i + 5]]);
208                let target = (i as i64 + off as i64) as usize;
209                if target < len && target < bitmask.len() && bitmask[target] == 1 {
210                    starts[target] = true;
211                }
212            }
213            crate::instruction::InstructionCategory::OneRegImmOffset if i + 2 <= len => {
214                let reg_byte = code[i + 1];
215                let lx = ((reg_byte as usize / 16) % 8).min(4);
216                let ly = if skip > lx + 1 {
217                    (skip - lx - 1).min(4)
218                } else {
219                    0
220                };
221                let off_start = i + 2 + lx;
222                if ly > 0 && off_start + ly <= len {
223                    let mut buf = [0u8; 4];
224                    buf[..ly].copy_from_slice(&code[off_start..off_start + ly]);
225                    if ly < 4 && buf[ly - 1] & 0x80 != 0 {
226                        for b in &mut buf[ly..4] {
227                            *b = 0xFF;
228                        }
229                    }
230                    let off = i32::from_le_bytes(buf);
231                    let target = (i as i64 + off as i64) as usize;
232                    if target < len && target < bitmask.len() && bitmask[target] == 1 {
233                        starts[target] = true;
234                    }
235                }
236            }
237            _ => {}
238        }
239
240        i += 1 + skip;
241    }
242
243    (starts, skip_table)
244}
245
246// ---- Per-block gas costs ----
247
248/// Compute gas cost per basic block using `GasSimulator`. Indexed by PC;
249/// only basic-block-start entries are meaningful.
250pub fn compute_block_gas_costs(
251    code: &[u8],
252    bitmask: &[u8],
253    basic_block_starts: &[bool],
254    mem_cycles: u8,
255) -> Vec<u32> {
256    use crate::gas_cost::{fast_cost_from_raw, skip_distance};
257    use crate::gas_sim::GasSimulator;
258
259    let len = code.len();
260    let mut costs = vec![0u32; len];
261    let mut sim = GasSimulator::new();
262    let mut block_start: usize = 0;
263    let mut in_block = false;
264
265    let mut pc = 0;
266    while pc < len {
267        if !basic_block_starts[pc] && !in_block {
268            pc += 1;
269            continue;
270        }
271
272        if basic_block_starts[pc] {
273            if in_block {
274                costs[block_start] = sim.flush_and_get_cost();
275                sim.reset();
276            }
277            block_start = pc;
278            in_block = true;
279        }
280
281        let opcode_byte = code[pc];
282        let raw_ra = if pc + 1 < len {
283            code[pc + 1] & 0x0F
284        } else {
285            0xFF
286        };
287        let raw_rb = if pc + 1 < len {
288            (code[pc + 1] >> 4) & 0x0F
289        } else {
290            0xFF
291        };
292        let raw_rd = if pc + 2 < len {
293            code[pc + 2] & 0x0F
294        } else {
295            0xFF
296        };
297
298        let fc = fast_cost_from_raw(
299            opcode_byte,
300            raw_ra,
301            raw_rb,
302            raw_rd,
303            pc as u32,
304            code,
305            bitmask,
306            mem_cycles,
307        );
308        sim.feed(&fc);
309
310        let skip = skip_distance(bitmask, pc);
311        pc += 1 + skip;
312    }
313
314    if in_block {
315        costs[block_start] = sim.flush_and_get_cost();
316    }
317
318    costs
319}
320
321// ---- Predecode ----
322
323fn flatten_args(args: &Args) -> (u8, u8, u8, u64, u64) {
324    match *args {
325        Args::None => (0, 0, 0, 0, 0),
326        Args::Imm { imm } => (0, 0, 0, imm, 0),
327        Args::RegExtImm { ra, imm } => (ra as u8, 0, 0, imm, 0),
328        Args::TwoImm { imm_x, imm_y } => (0, 0, 0, imm_x, imm_y),
329        Args::Offset { offset } => (0, 0, 0, offset, 0),
330        Args::RegImm { ra, imm } => (ra as u8, 0, 0, imm, 0),
331        Args::RegTwoImm { ra, imm_x, imm_y } => (ra as u8, 0, 0, imm_x, imm_y),
332        Args::RegImmOffset { ra, imm, offset } => (ra as u8, 0, 0, imm, offset),
333        Args::TwoReg { rd, ra } => (ra as u8, 0, rd as u8, 0, 0),
334        Args::TwoRegImm { ra, rb, imm } => (ra as u8, rb as u8, 0, imm, 0),
335        Args::TwoRegOffset { ra, rb, offset } => (ra as u8, rb as u8, 0, offset, 0),
336        Args::TwoRegTwoImm {
337            ra,
338            rb,
339            imm_x,
340            imm_y,
341        } => (ra as u8, rb as u8, 0, imm_x, imm_y),
342        Args::ThreeReg { ra, rb, rd } => (ra as u8, rb as u8, rd as u8, 0, 0),
343    }
344}
345
346/// Pre-decode all instructions into a flat array for fast execution.
347fn predecode_instructions(
348    code: &[u8],
349    bitmask: &[u8],
350    basic_block_starts: &[bool],
351    gas_block_starts: &[bool],
352    block_gas_costs: &[u32],
353) -> (Vec<DecodedInst>, Vec<u32>) {
354    let len = code.len();
355    let mut insts = Vec::new();
356    let mut pc_to_idx = vec![u32::MAX; len + 1];
357
358    let skip_at = |i: usize| -> usize {
359        for j in 0..25 {
360            let idx = i + 1 + j;
361            let bit = if idx < bitmask.len() { bitmask[idx] } else { 1 };
362            if bit == 1 {
363                return j;
364            }
365        }
366        24
367    };
368
369    let mut pc = 0;
370    while pc < len {
371        #[allow(clippy::collapsible_if)]
372        if pc < bitmask.len() && bitmask[pc] == 1 {
373            if let Some(opcode) = Opcode::from_byte(code[pc]) {
374                let skip = skip_at(pc);
375                let next_pc = (pc + 1 + skip) as u32;
376                let category = opcode.category();
377                let args = args::decode_args(code, pc, skip, category);
378                let bb_gas_cost = if pc < gas_block_starts.len() && gas_block_starts[pc] {
379                    block_gas_costs[pc]
380                } else {
381                    0
382                };
383
384                let (ra, rb, rd, imm1, imm2) = flatten_args(&args);
385
386                let idx = insts.len() as u32;
387                pc_to_idx[pc] = idx;
388                insts.push(DecodedInst {
389                    opcode,
390                    ra,
391                    rb,
392                    rd,
393                    imm1,
394                    imm2,
395                    pc: pc as u32,
396                    next_pc,
397                    next_idx: u32::MAX,
398                    target_idx: u32::MAX,
399                    bb_gas_cost,
400                });
401
402                pc = next_pc as usize;
403                continue;
404            }
405        }
406        pc += 1;
407    }
408
409    let sentinel_idx = insts.len() as u32;
410    // Sentinel trap at end so sequential advance past last insn doesn't OOB.
411    insts.push(DecodedInst {
412        opcode: Opcode::Trap,
413        ra: 0,
414        rb: 0,
415        rd: 0,
416        imm1: 0,
417        imm2: 0,
418        pc: len as u32,
419        next_pc: len as u32 + 1,
420        next_idx: sentinel_idx,
421        target_idx: u32::MAX,
422        bb_gas_cost: 1,
423    });
424
425    // Second pass: resolve next_idx and target_idx.
426    #[allow(clippy::needless_range_loop)]
427    for i in 0..insts.len() {
428        let inst = &insts[i];
429        let np = inst.next_pc as usize;
430        let next_idx = if np < pc_to_idx.len() {
431            let ni = pc_to_idx[np];
432            if ni != u32::MAX { ni } else { sentinel_idx }
433        } else {
434            sentinel_idx
435        };
436
437        let target_idx = {
438            let op = inst.opcode;
439            let target_from_imm1 = matches!(
440                op,
441                Opcode::Jump
442                    | Opcode::BranchEq
443                    | Opcode::BranchNe
444                    | Opcode::BranchLtU
445                    | Opcode::BranchLtS
446                    | Opcode::BranchGeU
447                    | Opcode::BranchGeS
448            );
449            let target_from_imm2 = matches!(
450                op,
451                Opcode::LoadImmJump
452                    | Opcode::BranchEqImm
453                    | Opcode::BranchNeImm
454                    | Opcode::BranchLtUImm
455                    | Opcode::BranchLeUImm
456                    | Opcode::BranchGeUImm
457                    | Opcode::BranchGtUImm
458                    | Opcode::BranchLtSImm
459                    | Opcode::BranchLeSImm
460                    | Opcode::BranchGeSImm
461                    | Opcode::BranchGtSImm
462            );
463            let target_pc_opt = if target_from_imm1 {
464                Some(inst.imm1 as usize)
465            } else if target_from_imm2 {
466                Some(inst.imm2 as usize)
467            } else {
468                None
469            };
470            if let Some(target_pc) = target_pc_opt {
471                if target_pc < basic_block_starts.len()
472                    && basic_block_starts[target_pc]
473                    && target_pc < pc_to_idx.len()
474                {
475                    pc_to_idx[target_pc]
476                } else {
477                    u32::MAX
478                }
479            } else {
480                u32::MAX
481            }
482        };
483
484        insts[i].next_idx = next_idx;
485        insts[i].target_idx = target_idx;
486    }
487
488    (insts, pc_to_idx)
489}
490
491#[cfg(test)]
492mod tests {
493    use super::*;
494
495    #[test]
496    fn decoded_inst_is_40_bytes() {
497        assert_eq!(core::mem::size_of::<DecodedInst>(), 40);
498    }
499
500    #[test]
501    fn predecode_empty_program() {
502        let prog = PvmProgram::new(vec![], vec![], vec![], 25).unwrap();
503        let p = predecode(&prog);
504        // Just the sentinel.
505        assert_eq!(p.decoded_insts.len(), 1);
506        assert_eq!(p.decoded_insts[0].opcode, Opcode::Trap);
507    }
508
509    #[test]
510    fn predecode_single_trap() {
511        // Opcode 0 (Trap), 1-byte instruction.
512        let prog = PvmProgram::new(vec![0u8], vec![1u8], vec![], 25).unwrap();
513        let p = predecode(&prog);
514        // One real + one sentinel.
515        assert_eq!(p.decoded_insts.len(), 2);
516        assert_eq!(p.decoded_insts[0].opcode, Opcode::Trap);
517        assert_eq!(p.decoded_insts[1].opcode, Opcode::Trap);
518        // PC 0 is a basic-block start (and gas-block start).
519        assert!(p.basic_block_starts[0]);
520        assert!(p.block_gas_costs[0] >= 1);
521    }
522
523    #[test]
524    fn predecode_pc_to_idx() {
525        // Two 1-byte traps.
526        let prog = PvmProgram::new(vec![0u8, 0], vec![1u8, 1], vec![], 25).unwrap();
527        let p = predecode(&prog);
528        assert_eq!(p.pc_to_idx[0], 0);
529        assert_eq!(p.pc_to_idx[1], 1);
530    }
531
532    #[test]
533    fn gas_block_starts_excludes_non_terminators() {
534        // Three 1-byte traps: only PC=0 is a "block start" technically;
535        // but post-terminator PCs are also gas-block starts. Trap is a
536        // terminator (per Opcode::is_terminator), so the byte after each
537        // trap that has bitmask=1 is also a gas-block start.
538        let prog = PvmProgram::new(vec![0u8, 0, 0], vec![1u8, 1, 1], vec![], 25).unwrap();
539        let starts = compute_gas_block_starts(&prog.code, &prog.bitmask);
540        assert!(starts[0]);
541        assert!(starts[1]); // post-Trap
542        assert!(starts[2]); // post-Trap
543    }
544}