Skip to main content

javm_exec/
instruction.rs

1//! PVM instruction set (JAM Gray Paper Appendix A.5).
2//!
3//! Cherry-picked verbatim from v2 `javm/src/instruction.rs`. Pure
4//! opcode enumeration + lookup tables; no cap awareness.
5
6/// PVM opcodes (ζᵢ values from Appendix A.5).
7///
8/// Organized by instruction category matching the spec sections.
9#[derive(Clone, Copy, Debug, PartialEq, Eq)]
10#[repr(u8)]
11pub enum Opcode {
12    // A.5.1: No arguments
13    Trap = 0,
14    Fallthrough = 1,
15    Unlikely = 2,
16    /// Management ops + dynamic CALL. φ\[11\]=op, φ\[12\]=subject|object.
17    Ecall = 3,
18
19    // A.5.2: One immediate
20    Ecalli = 10,
21
22    // A.5.3: One register + extended width immediate
23    LoadImm64 = 20,
24
25    // A.5.4: Two immediates
26    StoreImmU8 = 30,
27    StoreImmU16 = 31,
28    StoreImmU32 = 32,
29    StoreImmU64 = 33,
30
31    // A.5.5: One offset
32    Jump = 40,
33
34    // A.5.6: One register + one immediate
35    JumpInd = 50,
36    LoadImm = 51,
37    LoadU8 = 52,
38    LoadI8 = 53,
39    LoadU16 = 54,
40    LoadI16 = 55,
41    LoadU32 = 56,
42    LoadI32 = 57,
43    LoadU64 = 58,
44    StoreU8 = 59,
45    StoreU16 = 60,
46    StoreU32 = 61,
47    StoreU64 = 62,
48
49    // A.5.7: One register + two immediates
50    StoreImmIndU8 = 70,
51    StoreImmIndU16 = 71,
52    StoreImmIndU32 = 72,
53    StoreImmIndU64 = 73,
54
55    // A.5.8: One register + one immediate + one offset
56    LoadImmJump = 80,
57    BranchEqImm = 81,
58    BranchNeImm = 82,
59    BranchLtUImm = 83,
60    BranchLeUImm = 84,
61    BranchGeUImm = 85,
62    BranchGtUImm = 86,
63    BranchLtSImm = 87,
64    BranchLeSImm = 88,
65    BranchGeSImm = 89,
66    BranchGtSImm = 90,
67
68    // A.5.9: Two registers
69    MoveReg = 100,
70    Sbrk = 101,
71    CountSetBits64 = 102,
72    CountSetBits32 = 103,
73    LeadingZeroBits64 = 104,
74    LeadingZeroBits32 = 105,
75    TrailingZeroBits64 = 106,
76    TrailingZeroBits32 = 107,
77    SignExtend8 = 108,
78    SignExtend16 = 109,
79    ZeroExtend16 = 110,
80    ReverseBytes = 111,
81
82    // A.5.10: Two registers + one immediate
83    StoreIndU8 = 120,
84    StoreIndU16 = 121,
85    StoreIndU32 = 122,
86    StoreIndU64 = 123,
87    LoadIndU8 = 124,
88    LoadIndI8 = 125,
89    LoadIndU16 = 126,
90    LoadIndI16 = 127,
91    LoadIndU32 = 128,
92    LoadIndI32 = 129,
93    LoadIndU64 = 130,
94    AddImm32 = 131,
95    AndImm = 132,
96    XorImm = 133,
97    OrImm = 134,
98    MulImm32 = 135,
99    SetLtUImm = 136,
100    SetLtSImm = 137,
101    ShloLImm32 = 138,
102    ShloRImm32 = 139,
103    SharRImm32 = 140,
104    NegAddImm32 = 141,
105    SetGtUImm = 142,
106    SetGtSImm = 143,
107    ShloLImmAlt32 = 144,
108    ShloRImmAlt32 = 145,
109    SharRImmAlt32 = 146,
110    CmovIzImm = 147,
111    CmovNzImm = 148,
112    AddImm64 = 149,
113    MulImm64 = 150,
114    ShloLImm64 = 151,
115    ShloRImm64 = 152,
116    SharRImm64 = 153,
117    NegAddImm64 = 154,
118    ShloLImmAlt64 = 155,
119    ShloRImmAlt64 = 156,
120    SharRImmAlt64 = 157,
121    RotR64Imm = 158,
122    RotR64ImmAlt = 159,
123    RotR32Imm = 160,
124    RotR32ImmAlt = 161,
125
126    // A.5.11: Two registers + one offset
127    BranchEq = 170,
128    BranchNe = 171,
129    BranchLtU = 172,
130    BranchLtS = 173,
131    BranchGeU = 174,
132    BranchGeS = 175,
133
134    // A.5.12: Two registers + two immediates
135    LoadImmJumpInd = 180,
136
137    // A.5.13: Three registers
138    Add32 = 190,
139    Sub32 = 191,
140    Mul32 = 192,
141    DivU32 = 193,
142    DivS32 = 194,
143    RemU32 = 195,
144    RemS32 = 196,
145    ShloL32 = 197,
146    ShloR32 = 198,
147    SharR32 = 199,
148    Add64 = 200,
149    Sub64 = 201,
150    Mul64 = 202,
151    DivU64 = 203,
152    DivS64 = 204,
153    RemU64 = 205,
154    RemS64 = 206,
155    ShloL64 = 207,
156    ShloR64 = 208,
157    SharR64 = 209,
158    And = 210,
159    Xor = 211,
160    Or = 212,
161    MulUpperSS = 213,
162    MulUpperUU = 214,
163    MulUpperSU = 215,
164    SetLtU = 216,
165    SetLtS = 217,
166    CmovIz = 218,
167    CmovNz = 219,
168    RotL64 = 220,
169    RotL32 = 221,
170    RotR64 = 222,
171    RotR32 = 223,
172    AndInv = 224,
173    OrInv = 225,
174    Xnor = 226,
175    Max = 227,
176    MaxU = 228,
177    Min = 229,
178    MinU = 230,
179}
180
181/// Lookup table for O(1) opcode validation. OPCODE_TABLE[byte] = 1 if valid.
182static OPCODE_TABLE: [u8; 256] = {
183    let mut t = [0u8; 256];
184    let valid: &[u8] = &[
185        0, 1, 2, 3, 10, 20, 30, 31, 32, 33, 40, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62,
186        70, 71, 72, 73, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 100, 101, 102, 103, 104, 105,
187        106, 107, 108, 109, 110, 111, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131,
188        132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149,
189        150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 170, 171, 172, 173, 174, 175,
190        180, 190, 191, 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206,
191        207, 208, 209, 210, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223, 224,
192        225, 226, 227, 228, 229, 230,
193    ];
194    let mut i = 0;
195    while i < valid.len() {
196        t[valid[i] as usize] = 1;
197        i += 1;
198    }
199    t
200};
201
202impl Opcode {
203    /// Try to decode an opcode from a byte (eq A.19). O(1) lookup.
204    #[inline(always)]
205    pub fn from_byte(byte: u8) -> Option<Self> {
206        if OPCODE_TABLE[byte as usize] != 0 {
207            // SAFETY: we verified the byte is a valid opcode via lookup table
208            Some(unsafe { core::mem::transmute::<u8, Opcode>(byte) })
209        } else {
210            None
211        }
212    }
213
214    /// Instruction category determining the argument format.
215    pub fn category(self) -> InstructionCategory {
216        let b = self as u8;
217        match b {
218            0..=3 => InstructionCategory::NoArgs,
219            10 => InstructionCategory::OneImm,
220            20 => InstructionCategory::OneRegExtImm,
221            30..=33 => InstructionCategory::TwoImm,
222            40 => InstructionCategory::OneOffset,
223            50..=62 => InstructionCategory::OneRegOneImm,
224            70..=73 => InstructionCategory::OneRegTwoImm,
225            80..=90 => InstructionCategory::OneRegImmOffset,
226            100..=111 => InstructionCategory::TwoReg,
227            120..=161 => InstructionCategory::TwoRegOneImm,
228            170..=175 => InstructionCategory::TwoRegOneOffset,
229            180 => InstructionCategory::TwoRegTwoImm,
230            190..=230 => InstructionCategory::ThreeReg,
231            _ => InstructionCategory::NoArgs, // unreachable for valid opcodes
232        }
233    }
234
235    /// Gas cost for this instruction (ϱ∆). All instructions cost 1.
236    pub fn gas_cost(self) -> u64 {
237        1
238    }
239
240    /// Whether this opcode is a basic-block termination instruction (set T).
241    pub fn is_terminator(self) -> bool {
242        matches!(
243            self,
244            Opcode::Trap
245                | Opcode::Fallthrough
246                | Opcode::Unlikely
247                | Opcode::Ecall
248                | Opcode::Ecalli
249                | Opcode::Jump
250                | Opcode::JumpInd
251                | Opcode::LoadImmJump
252                | Opcode::LoadImmJumpInd
253                | Opcode::BranchEq
254                | Opcode::BranchNe
255                | Opcode::BranchLtU
256                | Opcode::BranchLtS
257                | Opcode::BranchGeU
258                | Opcode::BranchGeS
259                | Opcode::BranchEqImm
260                | Opcode::BranchNeImm
261                | Opcode::BranchLtUImm
262                | Opcode::BranchLtSImm
263                | Opcode::BranchLeUImm
264                | Opcode::BranchLeSImm
265                | Opcode::BranchGeUImm
266                | Opcode::BranchGeSImm
267                | Opcode::BranchGtUImm
268                | Opcode::BranchGtSImm
269        )
270    }
271}
272
273/// Instruction argument category (determines how operands are decoded).
274#[derive(Clone, Copy, Debug, PartialEq, Eq)]
275pub enum InstructionCategory {
276    /// A.5.1: No arguments (trap, fallthrough)
277    NoArgs,
278    /// A.5.2: One immediate (ecalli)
279    OneImm,
280    /// A.5.3: One register + extended width immediate (load_imm_64)
281    OneRegExtImm,
282    /// A.5.4: Two immediates (store_imm_*)
283    TwoImm,
284    /// A.5.5: One offset (jump)
285    OneOffset,
286    /// A.5.6: One register + one immediate
287    OneRegOneImm,
288    /// A.5.7: One register + two immediates
289    OneRegTwoImm,
290    /// A.5.8: One register + one immediate + one offset
291    OneRegImmOffset,
292    /// A.5.9: Two registers
293    TwoReg,
294    /// A.5.10: Two registers + one immediate
295    TwoRegOneImm,
296    /// A.5.11: Two registers + one offset
297    TwoRegOneOffset,
298    /// A.5.12: Two registers + two immediates
299    TwoRegTwoImm,
300    /// A.5.13: Three registers
301    ThreeReg,
302}
303
304/// Pre-computed lookup table: opcode byte → InstructionCategory.
305/// Eliminates the match in `Opcode::category()` from the hot compilation loop.
306/// Invalid opcodes map to NoArgs (same as the fallback in category()).
307static CATEGORY_LUT: [InstructionCategory; 256] = {
308    let mut t = [InstructionCategory::NoArgs; 256];
309    // OneImm
310    t[10] = InstructionCategory::OneImm;
311    // OneRegExtImm
312    t[20] = InstructionCategory::OneRegExtImm;
313    // TwoImm
314    t[30] = InstructionCategory::TwoImm;
315    t[31] = InstructionCategory::TwoImm;
316    t[32] = InstructionCategory::TwoImm;
317    t[33] = InstructionCategory::TwoImm;
318    // OneOffset
319    t[40] = InstructionCategory::OneOffset;
320    // OneRegOneImm
321    let mut i = 50;
322    while i <= 62 {
323        t[i] = InstructionCategory::OneRegOneImm;
324        i += 1;
325    }
326    // OneRegTwoImm
327    i = 70;
328    while i <= 73 {
329        t[i] = InstructionCategory::OneRegTwoImm;
330        i += 1;
331    }
332    // OneRegImmOffset
333    i = 80;
334    while i <= 90 {
335        t[i] = InstructionCategory::OneRegImmOffset;
336        i += 1;
337    }
338    // TwoReg
339    i = 100;
340    while i <= 111 {
341        t[i] = InstructionCategory::TwoReg;
342        i += 1;
343    }
344    // TwoRegOneImm
345    i = 120;
346    while i <= 161 {
347        t[i] = InstructionCategory::TwoRegOneImm;
348        i += 1;
349    }
350    // TwoRegOneOffset
351    i = 170;
352    while i <= 175 {
353        t[i] = InstructionCategory::TwoRegOneOffset;
354        i += 1;
355    }
356    // TwoRegTwoImm
357    t[180] = InstructionCategory::TwoRegTwoImm;
358    // ThreeReg
359    i = 190;
360    while i <= 230 {
361        t[i] = InstructionCategory::ThreeReg;
362        i += 1;
363    }
364    t
365};
366
367impl InstructionCategory {
368    /// Look up category from raw opcode byte via static table (O(1), no branching).
369    #[inline(always)]
370    pub fn from_opcode_byte(b: u8) -> Self {
371        CATEGORY_LUT[b as usize]
372    }
373}
374
375/// Combined opcode validation + category lookup in a single array access.
376/// Returns (is_valid, category) packed into a u8: high bit = valid, low 4 bits = category.
377static OPCODE_COMBINED: [u8; 256] = {
378    let mut t = [0u8; 256]; // 0 = invalid
379    // Build from OPCODE_TABLE (valid opcodes) and CATEGORY_LUT
380    let mut i = 0;
381    while i < 256 {
382        if OPCODE_TABLE[i] != 0 {
383            t[i] = 0x80 | (CATEGORY_LUT[i] as u8); // bit 7 = valid, low bits = category
384        }
385        i += 1;
386    }
387    t
388};
389
390/// Look up opcode validity and category in a single array access.
391/// Returns None for invalid opcodes, Some((Opcode, InstructionCategory)) for valid ones.
392#[inline(always)]
393pub fn decode_opcode_fast(b: u8) -> Option<(Opcode, InstructionCategory)> {
394    let entry = OPCODE_COMBINED[b as usize];
395    if entry & 0x80 != 0 {
396        // SAFETY: b is a valid Opcode discriminant — OPCODE_COMBINED[b] has bit 7 set
397        // only for bytes that correspond to defined Opcode variants.
398        let opcode = unsafe { core::mem::transmute::<u8, Opcode>(b) };
399        let category = CATEGORY_LUT[b as usize];
400        Some((opcode, category))
401    } else {
402        None
403    }
404}
405
406#[cfg(test)]
407mod tests {
408    use super::*;
409
410    #[test]
411    fn test_valid_opcodes() {
412        assert_eq!(Opcode::from_byte(0), Some(Opcode::Trap));
413        assert_eq!(Opcode::from_byte(1), Some(Opcode::Fallthrough));
414        assert_eq!(Opcode::from_byte(10), Some(Opcode::Ecalli));
415        assert_eq!(Opcode::from_byte(40), Some(Opcode::Jump));
416        assert_eq!(Opcode::from_byte(200), Some(Opcode::Add64));
417        assert_eq!(Opcode::from_byte(230), Some(Opcode::MinU));
418    }
419
420    #[test]
421    fn test_invalid_opcodes() {
422        assert_eq!(Opcode::from_byte(2), Some(Opcode::Unlikely)); // JAR v0.8.0
423        assert_eq!(Opcode::from_byte(15), None);
424        assert_eq!(Opcode::from_byte(255), None);
425    }
426
427    #[test]
428    fn test_categories() {
429        assert_eq!(Opcode::Trap.category(), InstructionCategory::NoArgs);
430        assert_eq!(Opcode::Ecalli.category(), InstructionCategory::OneImm);
431        assert_eq!(
432            Opcode::LoadImm64.category(),
433            InstructionCategory::OneRegExtImm
434        );
435        assert_eq!(Opcode::StoreImmU8.category(), InstructionCategory::TwoImm);
436        assert_eq!(Opcode::Jump.category(), InstructionCategory::OneOffset);
437        assert_eq!(
438            Opcode::LoadImm.category(),
439            InstructionCategory::OneRegOneImm
440        );
441        assert_eq!(
442            Opcode::StoreImmIndU8.category(),
443            InstructionCategory::OneRegTwoImm
444        );
445        assert_eq!(
446            Opcode::LoadImmJump.category(),
447            InstructionCategory::OneRegImmOffset
448        );
449        assert_eq!(Opcode::MoveReg.category(), InstructionCategory::TwoReg);
450        assert_eq!(
451            Opcode::AddImm32.category(),
452            InstructionCategory::TwoRegOneImm
453        );
454        assert_eq!(
455            Opcode::BranchEq.category(),
456            InstructionCategory::TwoRegOneOffset
457        );
458        assert_eq!(
459            Opcode::LoadImmJumpInd.category(),
460            InstructionCategory::TwoRegTwoImm
461        );
462        assert_eq!(Opcode::Add64.category(), InstructionCategory::ThreeReg);
463    }
464}