Skip to main content

javm_transpiler/
assembler.rs

1//! PVM program assembler — hand-craft PVM bytecode programs.
2//!
3//! Provides a builder API to emit individual PVM instructions
4//! (opcode + register operand + immediate encoding). Used by unit
5//! tests for the opcode encoding tables. Producing full chain
6//! Images happens via [`crate::link_elf`]; this module does not
7//! emit blobs.
8
9/// PVM register indices (0-12).
10#[derive(Clone, Copy, Debug, PartialEq, Eq)]
11#[repr(u8)]
12pub enum Reg {
13    RA = 0,  // Return address / reg 0
14    SP = 1,  // Stack pointer / reg 1
15    T0 = 2,  // Temporary 0
16    T1 = 3,  // Temporary 1
17    T2 = 4,  // Temporary 2
18    S0 = 5,  // Saved 0
19    S1 = 6,  // Saved 1
20    A0 = 7,  // Argument 0 (also host-call arg/return)
21    A1 = 8,  // Argument 1
22    A2 = 9,  // Argument 2
23    A3 = 10, // Argument 3
24    A4 = 11, // Argument 4
25    A5 = 12, // Argument 5
26}
27
28/// PVM program assembler.
29pub struct Assembler {
30    code: Vec<u8>,
31    bitmask: Vec<u8>,
32    jump_table: Vec<u32>,
33    /// Labels: name → code offset
34    labels: std::collections::HashMap<String, u32>,
35    /// Pending fixups: (code_offset, label_name, fixup_size)
36    _fixups: Vec<(usize, String, u8)>,
37}
38
39impl Default for Assembler {
40    fn default() -> Self {
41        Self::new()
42    }
43}
44
45impl Assembler {
46    pub fn new() -> Self {
47        Self {
48            code: Vec::new(),
49            bitmask: Vec::new(),
50            jump_table: Vec::new(),
51            labels: std::collections::HashMap::new(),
52            _fixups: Vec::new(),
53        }
54    }
55
56    /// Add a jump table entry pointing to the current code offset.
57    /// Returns the jump table index.
58    pub fn add_jump_entry(&mut self) -> usize {
59        let idx = self.jump_table.len();
60        self.jump_table.push(self.code.len() as u32);
61        idx
62    }
63
64    /// Add a jump table entry pointing to a specific code offset.
65    pub fn add_jump_entry_at(&mut self, offset: u32) -> usize {
66        let idx = self.jump_table.len();
67        self.jump_table.push(offset);
68        idx
69    }
70
71    /// Get the current code offset.
72    pub fn current_offset(&self) -> u32 {
73        self.code.len() as u32
74    }
75
76    /// Define a label at the current code position.
77    pub fn label(&mut self, name: &str) -> &mut Self {
78        self.labels.insert(name.to_string(), self.code.len() as u32);
79        self
80    }
81
82    // ===== No-argument instructions =====
83
84    /// Opcode 0: Trap (halt with error)
85    pub fn trap(&mut self) -> &mut Self {
86        self.emit_byte(0, true);
87        self
88    }
89
90    /// Opcode 1: Fallthrough (nop, continue to next instruction)
91    pub fn fallthrough(&mut self) -> &mut Self {
92        self.emit_byte(1, true);
93        self
94    }
95
96    // ===== One immediate instructions =====
97
98    /// Opcode 10: ecalli (host call with immediate ID)
99    pub fn ecalli(&mut self, id: u32) -> &mut Self {
100        self.emit_byte(10, true);
101        self.emit_imm(id as i64, 4);
102        self
103    }
104
105    // ===== One register + extended immediate =====
106
107    /// Opcode 20: load_imm_64 (load 64-bit immediate into register)
108    pub fn load_imm_64(&mut self, rd: Reg, imm: u64) -> &mut Self {
109        self.emit_byte(20, true);
110        self.emit_byte(rd as u8, false);
111        // 8 bytes of immediate, LE
112        for i in 0..8 {
113            self.emit_byte((imm >> (i * 8)) as u8, false);
114        }
115        self
116    }
117
118    // ===== One offset instructions =====
119
120    /// Opcode 40: jump (unconditional jump to offset)
121    pub fn jump(&mut self, target: u32) -> &mut Self {
122        self.emit_byte(40, true);
123        self.emit_imm(target as i64, 4);
124        self
125    }
126
127    // ===== One register + one immediate =====
128
129    /// Opcode 50: jump_ind (indirect jump through register + immediate)
130    pub fn jump_ind(&mut self, rd: Reg, imm: u32) -> &mut Self {
131        self.emit_byte(50, true);
132        self.emit_byte(rd as u8, false);
133        self.emit_imm(imm as i64, 4);
134        self
135    }
136
137    /// Opcode 51: load_imm (load sign-extended immediate into register)
138    pub fn load_imm(&mut self, rd: Reg, imm: i32) -> &mut Self {
139        self.emit_byte(51, true);
140        self.emit_byte(rd as u8, false);
141        self.emit_imm(imm as i64, 4);
142        self
143    }
144
145    /// Opcode 52: load_u8 (load u8 from address in immediate)
146    pub fn load_u8(&mut self, rd: Reg, addr: u32) -> &mut Self {
147        self.emit_byte(52, true);
148        self.emit_byte(rd as u8, false);
149        self.emit_imm(addr as i64, 4);
150        self
151    }
152
153    /// Opcode 58: load_u64 (load u64 from address in immediate)
154    pub fn load_u64(&mut self, rd: Reg, addr: u32) -> &mut Self {
155        self.emit_byte(58, true);
156        self.emit_byte(rd as u8, false);
157        self.emit_imm(addr as i64, 4);
158        self
159    }
160
161    /// Opcode 59: store_u8 (store u8 from register to address)
162    pub fn store_u8(&mut self, rd: Reg, addr: u32) -> &mut Self {
163        self.emit_byte(59, true);
164        self.emit_byte(rd as u8, false);
165        self.emit_imm(addr as i64, 4);
166        self
167    }
168
169    /// Opcode 62: store_u64 (store u64 from register to address)
170    pub fn store_u64(&mut self, rd: Reg, addr: u32) -> &mut Self {
171        self.emit_byte(62, true);
172        self.emit_byte(rd as u8, false);
173        self.emit_imm(addr as i64, 4);
174        self
175    }
176
177    // ===== One register + one immediate + one offset =====
178
179    /// Opcode 80: load_imm_jump (load immediate into register and jump)
180    pub fn load_imm_jump(&mut self, rd: Reg, imm: i32, target: u32) -> &mut Self {
181        // Encoding: opcode, reg_byte (rd in low 4 bits, lX in bits 4-6),
182        // then imm bytes, then offset bytes
183        self.emit_byte(80, true);
184        // reg byte: rD = rd, upper nibble encodes immediate size
185        let reg_byte = (rd as u8) | (4 << 4); // lX = 4 bytes
186        self.emit_byte(reg_byte, false);
187        self.emit_imm(imm as i64, 4);
188        self.emit_imm(target as i64, 4);
189        self
190    }
191
192    /// Opcode 81: branch_eq_imm (branch if register == immediate)
193    pub fn branch_eq_imm(&mut self, rd: Reg, imm: i32, target: u32) -> &mut Self {
194        self.emit_byte(81, true);
195        let reg_byte = (rd as u8) | (4 << 4);
196        self.emit_byte(reg_byte, false);
197        self.emit_imm(imm as i64, 4);
198        self.emit_imm(target as i64, 4);
199        self
200    }
201
202    /// Opcode 82: branch_ne_imm (branch if register != immediate)
203    pub fn branch_ne_imm(&mut self, rd: Reg, imm: i32, target: u32) -> &mut Self {
204        self.emit_byte(82, true);
205        let reg_byte = (rd as u8) | (4 << 4);
206        self.emit_byte(reg_byte, false);
207        self.emit_imm(imm as i64, 4);
208        self.emit_imm(target as i64, 4);
209        self
210    }
211
212    /// Opcode 83: branch_lt_u_imm (branch if register < unsigned immediate)
213    pub fn branch_lt_u_imm(&mut self, rd: Reg, imm: i32, target: u32) -> &mut Self {
214        self.emit_byte(83, true);
215        let reg_byte = (rd as u8) | (4 << 4);
216        self.emit_byte(reg_byte, false);
217        self.emit_imm(imm as i64, 4);
218        self.emit_imm(target as i64, 4);
219        self
220    }
221
222    // ===== Two register instructions =====
223
224    /// Opcode 100: move_reg (copy register)
225    pub fn move_reg(&mut self, rd: Reg, ra: Reg) -> &mut Self {
226        self.emit_byte(100, true);
227        self.emit_byte((rd as u8) | ((ra as u8) << 4), false);
228        self
229    }
230
231    // ===== Two register + one immediate =====
232
233    /// Opcode 124: load_ind_u8 (load u8 from [rA + imm] into rD)
234    pub fn load_ind_u8(&mut self, rd: Reg, ra: Reg, imm: i32) -> &mut Self {
235        self.emit_byte(124, true);
236        self.emit_byte((rd as u8) | ((ra as u8) << 4), false);
237        self.emit_imm(imm as i64, 4);
238        self
239    }
240
241    /// Opcode 128: load_ind_u32 (load u32 from [rA + imm] into rD)
242    pub fn load_ind_u32(&mut self, rd: Reg, ra: Reg, imm: i32) -> &mut Self {
243        self.emit_byte(128, true);
244        self.emit_byte((rd as u8) | ((ra as u8) << 4), false);
245        self.emit_imm(imm as i64, 4);
246        self
247    }
248
249    /// Opcode 130: load_ind_u64 (load u64 from [rA + imm] into rD)
250    pub fn load_ind_u64(&mut self, rd: Reg, ra: Reg, imm: i32) -> &mut Self {
251        self.emit_byte(130, true);
252        self.emit_byte((rd as u8) | ((ra as u8) << 4), false);
253        self.emit_imm(imm as i64, 4);
254        self
255    }
256
257    /// Opcode 120: store_ind_u8 (store u8 from rD to [rA + imm])
258    pub fn store_ind_u8(&mut self, rd: Reg, ra: Reg, imm: i32) -> &mut Self {
259        self.emit_byte(120, true);
260        self.emit_byte((rd as u8) | ((ra as u8) << 4), false);
261        self.emit_imm(imm as i64, 4);
262        self
263    }
264
265    /// Opcode 122: store_ind_u32 (store u32 from rD to [rA + imm])
266    pub fn store_ind_u32(&mut self, rd: Reg, ra: Reg, imm: i32) -> &mut Self {
267        self.emit_byte(122, true);
268        self.emit_byte((rd as u8) | ((ra as u8) << 4), false);
269        self.emit_imm(imm as i64, 4);
270        self
271    }
272
273    /// Opcode 123: store_ind_u64 (store u64 from rD to [rA + imm])
274    pub fn store_ind_u64(&mut self, rd: Reg, ra: Reg, imm: i32) -> &mut Self {
275        self.emit_byte(123, true);
276        self.emit_byte((rd as u8) | ((ra as u8) << 4), false);
277        self.emit_imm(imm as i64, 4);
278        self
279    }
280
281    /// Opcode 131: add_imm_32 (rD = rA + imm, 32-bit)
282    pub fn add_imm_32(&mut self, rd: Reg, ra: Reg, imm: i32) -> &mut Self {
283        self.emit_byte(131, true);
284        self.emit_byte((rd as u8) | ((ra as u8) << 4), false);
285        self.emit_imm(imm as i64, 4);
286        self
287    }
288
289    /// Opcode 149: add_imm_64 (rD = rA + imm, 64-bit)
290    pub fn add_imm_64(&mut self, rd: Reg, ra: Reg, imm: i32) -> &mut Self {
291        self.emit_byte(149, true);
292        self.emit_byte((rd as u8) | ((ra as u8) << 4), false);
293        self.emit_imm(imm as i64, 4);
294        self
295    }
296
297    // ===== Three register instructions =====
298
299    /// Opcode 200: add_64 (rD = rA + rB)
300    pub fn add_64(&mut self, rd: Reg, ra: Reg, rb: Reg) -> &mut Self {
301        self.emit_byte(200, true);
302        self.emit_byte((ra as u8) | ((rb as u8) << 4), false);
303        self.emit_byte(rd as u8, false);
304        self
305    }
306
307    /// Opcode 201: sub_64 (rD = rA - rB)
308    pub fn sub_64(&mut self, rd: Reg, ra: Reg, rb: Reg) -> &mut Self {
309        self.emit_byte(201, true);
310        self.emit_byte((ra as u8) | ((rb as u8) << 4), false);
311        self.emit_byte(rd as u8, false);
312        self
313    }
314
315    // ===== Public raw emission =====
316
317    /// Emit a raw byte with bitmask control.
318    pub fn emit_raw(&mut self, byte: u8, is_instruction_start: bool) {
319        self.emit_byte(byte, is_instruction_start);
320    }
321
322    // ===== Internal helpers =====
323
324    fn emit_byte(&mut self, byte: u8, is_instruction_start: bool) {
325        self.code.push(byte);
326        self.bitmask.push(if is_instruction_start { 1 } else { 0 });
327    }
328
329    fn emit_imm(&mut self, value: i64, size: u8) {
330        let bytes = value.to_le_bytes();
331        for byte in bytes.iter().take(size as usize) {
332            self.emit_byte(*byte, false);
333        }
334    }
335}
336
337#[cfg(test)]
338mod tests {
339    use super::*;
340
341    #[test]
342    fn test_trap_encoding() {
343        let mut asm = Assembler::new();
344        asm.trap();
345        assert_eq!(asm.code, vec![0]); // opcode 0
346        assert_eq!(asm.bitmask, vec![1]); // instruction start
347    }
348
349    #[test]
350    fn test_fallthrough_encoding() {
351        let mut asm = Assembler::new();
352        asm.fallthrough();
353        assert_eq!(asm.code, vec![1]);
354        assert_eq!(asm.bitmask, vec![1]);
355    }
356
357    #[test]
358    fn test_ecalli_encoding() {
359        let mut asm = Assembler::new();
360        asm.ecalli(0xFF);
361        assert_eq!(asm.code[0], 10); // opcode
362        // immediate = 0xFF as LE u32
363        assert_eq!(asm.code[1], 0xFF);
364        assert_eq!(asm.code.len(), 5); // 1 opcode + 4 imm
365        assert_eq!(asm.bitmask[0], 1);
366        assert!(asm.bitmask[1..].iter().all(|&b| b == 0));
367    }
368
369    #[test]
370    fn test_load_imm_64_encoding() {
371        let mut asm = Assembler::new();
372        asm.load_imm_64(Reg::A0, 0x0102030405060708);
373        assert_eq!(asm.code[0], 20); // opcode
374        assert_eq!(asm.code[1], Reg::A0 as u8); // register
375        // 8 bytes LE immediate
376        assert_eq!(asm.code[2], 0x08);
377        assert_eq!(asm.code[3], 0x07);
378        assert_eq!(asm.code[9], 0x01);
379        assert_eq!(asm.code.len(), 10);
380    }
381
382    #[test]
383    fn test_jump_encoding() {
384        let mut asm = Assembler::new();
385        asm.jump(42);
386        assert_eq!(asm.code[0], 40); // opcode
387        assert_eq!(asm.code[1], 42); // target LE
388        assert_eq!(asm.code.len(), 5);
389    }
390
391    #[test]
392    fn test_load_imm_encoding() {
393        let mut asm = Assembler::new();
394        asm.load_imm(Reg::T0, -1);
395        assert_eq!(asm.code[0], 51); // opcode
396        assert_eq!(asm.code[1], Reg::T0 as u8);
397        // -1 as i32 LE = 0xFF 0xFF 0xFF 0xFF
398        assert_eq!(&asm.code[2..6], &[0xFF, 0xFF, 0xFF, 0xFF]);
399    }
400
401    #[test]
402    fn test_move_reg_encoding() {
403        let mut asm = Assembler::new();
404        asm.move_reg(Reg::A0, Reg::T0);
405        assert_eq!(asm.code[0], 100); // opcode
406        // reg byte: rd=A0(7) in low nibble, ra=T0(2) in high nibble
407        assert_eq!(asm.code[1], (Reg::A0 as u8) | ((Reg::T0 as u8) << 4));
408        assert_eq!(asm.code.len(), 2);
409    }
410
411    #[test]
412    fn test_add_64_encoding() {
413        let mut asm = Assembler::new();
414        asm.add_64(Reg::A0, Reg::T0, Reg::T1);
415        assert_eq!(asm.code[0], 200); // opcode
416        // Three-reg: ra=T0(2) in low nibble, rb=T1(3) in high nibble
417        assert_eq!(asm.code[1], (Reg::T0 as u8) | ((Reg::T1 as u8) << 4));
418        assert_eq!(asm.code[2], Reg::A0 as u8); // rd
419        assert_eq!(asm.code.len(), 3);
420    }
421
422    #[test]
423    fn test_multiple_instructions_bitmask() {
424        let mut asm = Assembler::new();
425        asm.trap(); // 1 byte
426        asm.fallthrough(); // 1 byte
427        asm.load_imm(Reg::A0, 42); // 6 bytes
428        assert_eq!(asm.bitmask.len(), 8);
429        // Instruction starts at offsets 0, 1, 2
430        assert_eq!(asm.bitmask[0], 1);
431        assert_eq!(asm.bitmask[1], 1);
432        assert_eq!(asm.bitmask[2], 1);
433        // Remaining are non-starts
434        assert!(asm.bitmask[3..].iter().all(|&b| b == 0));
435    }
436
437    #[test]
438    fn test_current_offset_tracks_position() {
439        let mut asm = Assembler::new();
440        assert_eq!(asm.current_offset(), 0);
441        asm.trap();
442        assert_eq!(asm.current_offset(), 1);
443        asm.load_imm_64(Reg::A0, 0);
444        assert_eq!(asm.current_offset(), 11); // 1 + 10
445    }
446
447    #[test]
448    fn test_label_records_offset() {
449        let mut asm = Assembler::new();
450        asm.trap();
451        asm.label("after_trap");
452        assert_eq!(asm.labels["after_trap"], 1);
453    }
454}