1use alloc::vec;
25use alloc::vec::Vec;
26
27use super::asm::{Assembler, Cc, Label, Reg};
28use javm_exec::args::{self, Args};
29use javm_exec::gas_sim::GasSimulator;
30use javm_exec::instruction::Opcode;
31
32fn compute_skip(pc: usize, bitmask: &[u8]) -> usize {
34 for j in 0..25 {
35 let idx = pc + 1 + j;
36 let bit = if idx < bitmask.len() { bitmask[idx] } else { 1 };
37 if bit == 1 {
38 return j;
39 }
40 }
41 24
42}
43const REG_MAP: [Reg; 13] = [
46 Reg::RBP, Reg::RBX, Reg::R12, Reg::R13, Reg::R14, Reg::RSI, Reg::RDI, Reg::R8, Reg::R9, Reg::R10, Reg::R11, Reg::RAX, Reg::RCX, ];
60
61const SCRATCH: Reg = Reg::RDX;
63const GAS: Reg = Reg::R15;
66
67#[allow(dead_code)]
69const CALLER_SAVED: [Reg; 8] = [
70 Reg::RSI,
71 Reg::RDI,
72 Reg::R8,
73 Reg::R9,
74 Reg::R10,
75 Reg::R11,
76 Reg::RAX,
77 Reg::RCX,
78];
79
80pub const CTX_VA: u64 = 1u64 << 39;
92
93use super::JitContext;
94use memoffset::offset_of;
95
96pub const CTX_REGS: u64 = CTX_VA + offset_of!(JitContext, regs) as u64;
97pub const CTX_GAS: u64 = CTX_VA + offset_of!(JitContext, gas) as u64;
98pub const CTX_EXIT_REASON: u64 = CTX_VA + offset_of!(JitContext, exit_reason) as u64;
99pub const CTX_EXIT_ARG: u64 = CTX_VA + offset_of!(JitContext, exit_arg) as u64;
100pub const CTX_HEAP_BASE: u64 = CTX_VA + offset_of!(JitContext, heap_base) as u64;
101pub const CTX_HEAP_TOP: u64 = CTX_VA + offset_of!(JitContext, heap_top) as u64;
102pub const CTX_JT_PTR: u64 = CTX_VA + offset_of!(JitContext, jt_ptr) as u64;
103pub const CTX_JT_LEN: u64 = CTX_VA + offset_of!(JitContext, jt_len) as u64;
104pub const CTX_BB_STARTS: u64 = CTX_VA + offset_of!(JitContext, bb_starts) as u64;
105pub const CTX_BB_LEN: u64 = CTX_VA + offset_of!(JitContext, bb_len) as u64;
106pub const CTX_ENTRY_PC: u64 = CTX_VA + offset_of!(JitContext, entry_pc) as u64;
107pub const CTX_PC: u64 = CTX_VA + offset_of!(JitContext, pc) as u64;
108pub const CTX_DISPATCH_TABLE: u64 = CTX_VA + offset_of!(JitContext, dispatch_table) as u64;
109pub const CTX_CODE_BASE: u64 = CTX_VA + offset_of!(JitContext, code_base) as u64;
110pub const CTX_FAST_REENTRY: u64 = CTX_VA + offset_of!(JitContext, fast_reentry) as u64;
111
112pub const EXIT_HALT: u32 = 0;
114pub const EXIT_PANIC: u32 = 1;
115pub const EXIT_OOG: u32 = 2;
116pub const EXIT_PAGE_FAULT: u32 = 3;
117pub const EXIT_HOST_CALL: u32 = 4;
118pub const EXIT_ECALL: u32 = 6;
119pub const EXIT_TRAP: u32 = 7;
120
121pub struct CompileResult {
123 pub native_code: Vec<u8>,
124 pub dispatch_table: Vec<i32>,
125 pub trap_table: Vec<(u32, u32)>,
126 pub exit_label_offset: u32,
127}
128
129#[repr(C)]
131pub struct HelperFns {
132 pub mem_read_u8: u64,
133 pub mem_read_u16: u64,
134 pub mem_read_u32: u64,
135 pub mem_read_u64: u64,
136 pub mem_write_u8: u64,
137 pub mem_write_u16: u64,
138 pub mem_write_u32: u64,
139 pub mem_write_u64: u64,
140 pub sbrk_helper: u64,
141}
142
143#[derive(Clone, Copy, Debug)]
145enum RegDef {
146 Unknown,
148 Const(u32),
150 Shifted { src: usize, shift: u8 },
154 ScaledAdd { base: usize, idx: usize, shift: u8 },
157}
158
159pub struct Compiler {
161 pub asm: Assembler,
162 label_base: u32,
165 gas_block_pcs: Vec<u32>,
167 exit_label: Label,
169 oog_label: Label,
171 panic_label: Label,
173 oog_pc_label: Label,
175 oog_stubs: Vec<(Label, u32, u32)>, helpers: HelperFns,
179 bitmask_ptr: *const u8,
181 bitmask_len: usize,
182 reg_defs: [RegDef; 13],
184 reg_defs_active: u16,
186 last_add_cf: Option<(usize, usize, usize)>,
192 trap_entries: Vec<(u32, u32)>,
194 mem_cycles: u8,
196}
197
198impl Compiler {
199 pub fn new(
200 bitmask: &[u8],
201 _jump_table: &[u32],
202 helpers: HelperFns,
203 code_len: usize,
204 jit_va_base: u64,
205 mem_cycles: u8,
206 ) -> Self {
207 let estimated_native = code_len * 3 + 8192;
210 let estimated_labels = code_len + 1024;
212 let mut asm = Assembler::with_capacity(estimated_native, estimated_labels);
215 asm.set_jit_va_base(jit_va_base);
219 let _reserved = asm.new_label(); let exit_label = asm.new_label();
222 let oog_label = asm.new_label();
223 let panic_label = asm.new_label();
224 let oog_pc_label = asm.new_label();
225 let label_base = asm.labels_len() as u32;
230 asm.bulk_create_labels(code_len + 1);
231 Self {
232 label_base,
233 gas_block_pcs: Vec::with_capacity(1024),
234 asm,
235 exit_label,
236 oog_label,
237 panic_label,
238 oog_pc_label,
239 oog_stubs: Vec::with_capacity(1024),
240 reg_defs: [RegDef::Unknown; 13],
241 reg_defs_active: 0,
242 last_add_cf: None,
243 helpers,
244 bitmask_ptr: bitmask.as_ptr(),
245 bitmask_len: bitmask.len(),
246 trap_entries: Vec::with_capacity(2048),
247 mem_cycles,
248 }
249 }
250
251 #[inline]
253 fn label_for_pc(&self, pc: u32) -> Label {
254 Label(self.label_base + pc)
255 }
256
257 fn is_basic_block_start(&self, idx: u32) -> bool {
258 let i = idx as usize;
259 i < self.bitmask_len && unsafe { *self.bitmask_ptr.add(i) } == 1
262 }
263
264 pub fn compile(mut self, code: &[u8], bitmask: &[u8]) -> CompileResult {
267 let code_len = code.len();
268
269 self.emit_prologue();
271
272 let mut gas_sim = GasSimulator::new();
276 let mut pending_gas: Option<(Label, u32, usize)> = None;
277 let mut next_is_gas_start = true;
280
281 let mut pc: usize = 0;
283 while pc < code.len() && (pc >= bitmask.len() || bitmask[pc] != 1) {
284 pc += 1;
285 }
286
287 let code_ptr = code.as_ptr();
288
289 while pc < code.len() {
290 self.asm.ensure_capacity(512);
291
292 let raw_byte = unsafe { *code_ptr.add(pc) };
294 let is_gas_start = next_is_gas_start;
295 next_is_gas_start = false;
296
297 if raw_byte == 1 || raw_byte == 2 {
300 let skip = javm_exec::gas_cost::skip_distance(bitmask, pc);
302 if is_gas_start {
303 self.emit_gas_block_start(pc, &mut pending_gas, &mut gas_sim);
304 }
305 gas_sim.feed(&javm_exec::gas_cost::FastCost {
306 cycles: 2,
307 decode_slots: 1,
308 exec_unit: 0,
309 src_mask: 0,
310 dst_mask: 0,
311 is_terminator: true,
312 is_move_reg: false,
313 });
314 next_is_gas_start = true; pc += 1 + skip;
316 continue;
317 }
318
319 let (opcode, category) = match javm_exec::instruction::decode_opcode_fast(raw_byte) {
321 Some(oc) => oc,
322 None => {
323 self.asm.mov_store32_rip_rel_imm(CTX_PC, pc as i32);
324 self.emit_exit(EXIT_PANIC, 0);
325 pc += 1;
326 continue;
327 }
328 };
329 let skip = javm_exec::gas_cost::skip_distance(bitmask, pc);
330 let next_pc = (pc + 1 + skip) as u32;
331
332 let reg_byte1 = if pc + 1 < code.len() {
336 unsafe { *code_ptr.add(pc + 1) }
337 } else {
338 0
339 };
340 let reg_byte2 = if pc + 2 < code.len() {
341 unsafe { *code_ptr.add(pc + 2) }
342 } else {
343 0
344 };
345 let raw_ra = reg_byte1 & 0x0F;
346 let raw_rb = reg_byte1 >> 4;
347
348 let decoded_args = match category {
349 javm_exec::instruction::InstructionCategory::ThreeReg => Args::ThreeReg {
350 ra: raw_ra.min(12) as usize,
351 rb: raw_rb.min(12) as usize,
352 rd: reg_byte2.min(12) as usize,
353 },
354 javm_exec::instruction::InstructionCategory::TwoReg => Args::TwoReg {
355 rd: raw_ra.min(12) as usize,
356 ra: raw_rb.min(12) as usize,
357 },
358 javm_exec::instruction::InstructionCategory::TwoRegOneImm => {
359 let ra = raw_ra.min(12) as usize;
360 let rb = raw_rb.min(12) as usize;
361 let lx = if skip > 1 { (skip - 1).min(4) } else { 0 };
362 let imm = args::read_signed_imm(code, pc + 2, lx);
363 Args::TwoRegImm { ra, rb, imm }
364 }
365 javm_exec::instruction::InstructionCategory::NoArgs => Args::None,
366 javm_exec::instruction::InstructionCategory::OneImm => {
367 let lx = skip.min(4);
368 Args::Imm {
369 imm: args::read_signed_imm(code, pc + 1, lx),
370 }
371 }
372 javm_exec::instruction::InstructionCategory::OneRegOneImm => {
373 let ra = raw_ra.min(12) as usize;
374 let lx = if skip > 1 { (skip - 1).min(4) } else { 0 };
375 Args::RegImm {
376 ra,
377 imm: args::read_signed_imm(code, pc + 2, lx),
378 }
379 }
380 javm_exec::instruction::InstructionCategory::OneRegExtImm => {
381 let ra = raw_ra.min(12) as usize;
382 Args::RegExtImm {
383 ra,
384 imm: args::read_le_imm(code, pc + 2, 8),
385 }
386 }
387 javm_exec::instruction::InstructionCategory::TwoImm => {
388 let lx = (reg_byte1 as usize % 8).min(4);
389 let ly = if skip > lx + 1 {
390 (skip - lx - 1).min(4)
391 } else {
392 0
393 };
394 Args::TwoImm {
395 imm_x: args::read_signed_imm(code, pc + 2, lx),
396 imm_y: args::read_signed_imm(code, pc + 2 + lx, ly),
397 }
398 }
399 javm_exec::instruction::InstructionCategory::OneOffset => {
400 let lx = skip.min(4);
401 let signed_off = args::read_signed_imm(code, pc + 1, lx) as i64;
402 Args::Offset {
403 offset: (pc as i64).wrapping_add(signed_off) as u64,
404 }
405 }
406 javm_exec::instruction::InstructionCategory::OneRegTwoImm => {
407 let ra = raw_ra.min(12) as usize;
408 let lx = ((reg_byte1 as usize / 16) % 8).min(4);
409 let ly = if skip > lx + 1 {
410 (skip - lx - 1).min(4)
411 } else {
412 0
413 };
414 Args::RegTwoImm {
415 ra,
416 imm_x: args::read_signed_imm(code, pc + 2, lx),
417 imm_y: args::read_signed_imm(code, pc + 2 + lx, ly),
418 }
419 }
420 javm_exec::instruction::InstructionCategory::OneRegImmOffset => {
421 let ra = raw_ra.min(12) as usize;
422 let lx = ((reg_byte1 as usize / 16) % 8).min(4);
423 let ly = if skip > lx + 1 {
424 (skip - lx - 1).min(4)
425 } else {
426 0
427 };
428 let imm = args::read_signed_imm(code, pc + 2, lx);
429 let signed_off = args::read_signed_imm(code, pc + 2 + lx, ly) as i64;
430 Args::RegImmOffset {
431 ra,
432 imm,
433 offset: (pc as i64).wrapping_add(signed_off) as u64,
434 }
435 }
436 javm_exec::instruction::InstructionCategory::TwoRegOneOffset => {
437 let ra = raw_ra.min(12) as usize;
438 let rb = raw_rb.min(12) as usize;
439 let lx = if skip > 1 { (skip - 1).min(4) } else { 0 };
440 let signed_off = args::read_signed_imm(code, pc + 2, lx) as i64;
441 Args::TwoRegOffset {
442 ra,
443 rb,
444 offset: (pc as i64).wrapping_add(signed_off) as u64,
445 }
446 }
447 javm_exec::instruction::InstructionCategory::TwoRegTwoImm => {
448 let ra = raw_ra.min(12) as usize;
449 let rb = raw_rb.min(12) as usize;
450 let lx = (reg_byte2 as usize % 8).min(4);
451 let ly = if skip > lx + 2 {
452 (skip - lx - 2).min(4)
453 } else {
454 0
455 };
456 Args::TwoRegTwoImm {
457 ra,
458 rb,
459 imm_x: args::read_signed_imm(code, pc + 3, lx),
460 imm_y: args::read_signed_imm(code, pc + 3 + lx, ly),
461 }
462 }
463 };
464
465 if is_gas_start {
467 self.emit_gas_block_start(pc, &mut pending_gas, &mut gas_sim);
468 }
469
470 let is_terminator = {
471 let (term, needs_full) = javm_exec::gas_cost::feed_gas_direct(
474 opcode as u8,
475 raw_ra,
476 raw_rb,
477 reg_byte2 & 0x0F,
478 &mut gas_sim,
479 self.mem_cycles,
480 );
481 if needs_full {
482 let fc = javm_exec::gas_cost::fast_cost_lut_regs(
484 opcode as u8,
485 &decoded_args,
486 pc,
487 code,
488 bitmask,
489 raw_ra,
490 raw_rb,
491 reg_byte2 & 0x0F,
492 self.mem_cycles,
493 );
494 gas_sim.feed(&fc);
495 fc.is_terminator
496 } else {
497 term
498 }
499 };
500
501 let fused = match opcode {
503 Opcode::Add64 => {
504 self.try_fuse_scaled_index_raw(code, bitmask, pc, &decoded_args, &mut gas_sim)
505 }
506 Opcode::Mul64 => {
507 self.try_fuse_mul_pair_raw(code, bitmask, pc, &decoded_args, &mut gas_sim)
508 }
509 _ => None,
510 };
511
512 if let Some(advance) = fused {
513 self.last_add_cf = None; pc += advance;
515 continue;
516 }
517
518 if !matches!(opcode, Opcode::Add64 | Opcode::SetLtU) {
521 self.last_add_cf = None;
522 }
523
524 self.compile_instruction(opcode, &decoded_args, pc as u32, next_pc);
525
526 match opcode {
533 Opcode::Add64
534 | Opcode::LoadImm
535 | Opcode::LoadImm64
536 | Opcode::ShloLImm64
537 | Opcode::MoveReg => {
538 self.update_reg_defs(opcode, &decoded_args);
539 }
540 _ => {
541 match category {
544 javm_exec::instruction::InstructionCategory::ThreeReg => {
545 if let Args::ThreeReg { rd, .. } = decoded_args {
546 self.invalidate_reg(rd);
547 }
548 }
549 javm_exec::instruction::InstructionCategory::TwoReg => {
550 if let Args::TwoReg { rd, .. } = decoded_args {
551 self.invalidate_reg(rd);
552 }
553 }
554 javm_exec::instruction::InstructionCategory::TwoRegOneImm
555 | javm_exec::instruction::InstructionCategory::OneRegOneImm
556 | javm_exec::instruction::InstructionCategory::OneRegExtImm
557 | javm_exec::instruction::InstructionCategory::OneRegTwoImm
558 | javm_exec::instruction::InstructionCategory::OneRegImmOffset => {
559 self.invalidate_reg(raw_ra.min(12) as usize);
561 }
562 _ => {
563 if is_terminator {
567 self.invalidate_all_regs();
568 }
569 }
570 }
571 }
572 }
573
574 if is_terminator {
576 next_is_gas_start = true;
577 }
578
579 pc += 1 + skip;
580 }
581
582 if let Some((stub_label, block_pc, patch_offset)) = pending_gas.take() {
584 let cost = gas_sim.flush_and_get_cost();
585 self.asm.patch_i32(patch_offset, cost as i32);
586 self.oog_stubs.push((stub_label, block_pc, cost));
587 }
588
589 self.emit_exit_sequences();
591
592 let table_len = code_len + 1;
595 let mut dispatch_table = vec![0i32; table_len];
596 for &pvm_pc in self.gas_block_pcs.iter() {
597 let label = Label(self.label_base + pvm_pc);
598 if let Some(offset) = self.asm.label_offset(label) {
599 dispatch_table[pvm_pc as usize] = offset as i32;
600 }
601 }
602 let exit_label_offset = self.asm.label_offset(self.exit_label).unwrap_or(0) as u32;
606 let trap_table = self.trap_entries;
607
608 CompileResult {
609 native_code: self.asm.finalize(),
610 dispatch_table,
611 trap_table,
612 exit_label_offset,
613 }
614 }
615
616 #[allow(dead_code)]
618 fn save_caller_saved(&mut self) {
619 for ® in &CALLER_SAVED {
620 self.asm.push(reg);
621 }
622 }
623
624 #[allow(dead_code)]
626 fn restore_caller_saved(&mut self) {
627 for ® in CALLER_SAVED.iter().rev() {
628 self.asm.pop(reg);
629 }
630 }
631
632 fn try_fuse_scaled_index_raw(
635 &mut self,
636 code: &[u8],
637 bitmask: &[u8],
638 pc: usize,
639 args: &Args,
640 gas_sim: &mut GasSimulator,
641 ) -> Option<usize> {
642 let Args::ThreeReg {
643 ra: a1_ra,
644 rb: a1_rb,
645 rd: a1_rd,
646 } = args
647 else {
648 return None;
649 };
650 if a1_ra != a1_rb {
651 return None;
652 }
653 let idx_reg = *a1_ra;
654 let d1 = *a1_rd;
655
656 let skip1 = compute_skip(pc, bitmask);
658 let pc2 = pc + 1 + skip1;
659 if pc2 >= code.len() || (pc2 < bitmask.len() && bitmask[pc2] != 1) {
660 return None;
661 }
662 let op2 = Opcode::from_byte(code[pc2])?;
663 if op2 != Opcode::Add64 {
664 return None;
665 }
666 let skip2 = compute_skip(pc2, bitmask);
667 let args2 = args::decode_args(code, pc2, skip2, op2.category());
668 let Args::ThreeReg {
669 ra: a2_ra,
670 rb: a2_rb,
671 rd: a2_rd,
672 } = args2
673 else {
674 return None;
675 };
676 if a2_ra != d1 || a2_rb != d1 || a2_rd != d1 {
677 return None;
678 }
679
680 let pc3 = pc2 + 1 + skip2;
682 if pc3 >= code.len() || (pc3 < bitmask.len() && bitmask[pc3] != 1) {
683 return None;
684 }
685 let op3 = Opcode::from_byte(code[pc3])?;
686 if op3 != Opcode::Add64 {
687 return None;
688 }
689 let skip3 = compute_skip(pc3, bitmask);
690 let args3 = args::decode_args(code, pc3, skip3, op3.category());
691 let Args::ThreeReg {
692 ra: a3_ra,
693 rb: a3_rb,
694 rd: a3_rd,
695 } = args3
696 else {
697 return None;
698 };
699 let base_reg;
700 if a3_rb == d1 && a3_ra != d1 {
701 base_reg = a3_ra;
702 } else if a3_ra == d1 && a3_rb != d1 {
703 base_reg = a3_rb;
704 } else {
705 return None;
706 }
707 let addr_reg = a3_rd;
708
709 let pc4 = pc3 + 1 + skip3;
711 if pc4 >= code.len() || (pc4 < bitmask.len() && bitmask[pc4] != 1) {
712 return None;
713 }
714 let op4 = Opcode::from_byte(code[pc4])?;
715 let skip4 = compute_skip(pc4, bitmask);
716 let args4 = args::decode_args(code, pc4, skip4, op4.category());
717
718 for &(opc, a, p) in &[(op2, &args2, pc2), (op3, &args3, pc3), (op4, &args4, pc4)] {
720 let fc = javm_exec::gas_cost::fast_cost_from_decoded(
721 opc as u8,
722 a,
723 p as u32,
724 code,
725 bitmask,
726 self.mem_cycles,
727 );
728 gas_sim.feed(&fc);
729 }
730
731 match op4 {
737 Opcode::LoadIndU8
738 | Opcode::LoadIndI8
739 | Opcode::LoadIndU16
740 | Opcode::LoadIndI16
741 | Opcode::LoadIndU32
742 | Opcode::LoadIndI32
743 | Opcode::LoadIndU64 => {
744 let Args::TwoRegImm { ra, rb, imm } = args4 else {
745 return None;
746 };
747 if rb != addr_reg || imm as i32 != 0 {
748 return None;
749 }
750 self.asm
751 .lea_sib_scaled_32(SCRATCH, REG_MAP[base_reg], REG_MAP[idx_reg], 2);
752 let fn_addr = self.read_fn_for(op4);
753 let ra_reg = REG_MAP[ra];
754 self.emit_mem_read(ra_reg, SCRATCH, fn_addr, pc4 as u32);
755 self.emit_sign_extend(op4, ra_reg);
756 self.invalidate_all_regs();
757 Some(pc4 + 1 + skip4 - pc)
758 }
759 Opcode::StoreIndU8
760 | Opcode::StoreIndU16
761 | Opcode::StoreIndU32
762 | Opcode::StoreIndU64 => {
763 let Args::TwoRegImm { ra, rb, imm } = args4 else {
764 return None;
765 };
766 if rb != addr_reg || imm as i32 != 0 {
767 return None;
768 }
769 self.asm
770 .lea_sib_scaled_32(SCRATCH, REG_MAP[base_reg], REG_MAP[idx_reg], 2);
771 let fn_addr = self.write_fn_for(op4);
772 let ra_reg = REG_MAP[ra];
773 self.emit_mem_write(true, ra_reg, fn_addr, pc4 as u32);
774 self.invalidate_all_regs();
775 Some(pc4 + 1 + skip4 - pc)
776 }
777 _ => None,
778 }
779 }
780
781 fn try_fuse_mul_pair_raw(
793 &mut self,
794 code: &[u8],
795 bitmask: &[u8],
796 pc: usize,
797 args: &Args,
798 gas_sim: &mut GasSimulator,
799 ) -> Option<usize> {
800 let Args::ThreeReg {
801 ra: m_ra,
802 rb: m_rb,
803 rd: m_rd,
804 } = args
805 else {
806 return None;
807 };
808
809 let skip1 = compute_skip(pc, bitmask);
810 let pc2 = pc + 1 + skip1;
811 if pc2 >= code.len() || (pc2 < bitmask.len() && bitmask[pc2] != 1) {
812 return None;
813 }
814 let op2 = Opcode::from_byte(code[pc2])?;
815 let signed = match op2 {
816 Opcode::MulUpperSS => true,
817 Opcode::MulUpperUU => false,
818 _ => return None,
819 };
820 let skip2 = compute_skip(pc2, bitmask);
821 let args2 = args::decode_args(code, pc2, skip2, op2.category());
822 let Args::ThreeReg {
823 ra: u_ra,
824 rb: u_rb,
825 rd: u_rd,
826 } = args2
827 else {
828 return None;
829 };
830 if u_ra != *m_ra || u_rb != *m_rb {
831 return None;
832 }
833 if *m_rd == u_rd {
835 return None;
836 }
837
838 let fc = javm_exec::gas_cost::fast_cost_from_decoded(
841 op2 as u8,
842 &args2,
843 pc2 as u32,
844 code,
845 bitmask,
846 self.mem_cycles,
847 );
848 gas_sim.feed(&fc);
849
850 let (a, b) = (REG_MAP[*m_ra], REG_MAP[*m_rb]);
851 let (rd_lo, rd_hi) = (REG_MAP[*m_rd], REG_MAP[u_rd]);
852 let phi11 = REG_MAP[11]; debug_assert_eq!(phi11, Reg::RAX);
854
855 let need_save_phi11 = rd_lo != phi11 && rd_hi != phi11;
875
876 if need_save_phi11 {
877 self.asm.push(phi11);
878 }
879
880 let mul_src = if b == phi11 {
884 if need_save_phi11 {
885 self.asm.mov_load64(SCRATCH, Reg::RSP, 0);
887 } else {
888 self.asm.mov_rr(SCRATCH, b);
891 }
892 SCRATCH
893 } else {
894 b
895 };
896
897 if a != phi11 {
899 self.asm.mov_rr(phi11, a);
900 }
901
902 if signed {
903 self.asm.imul_rdx_rax(mul_src);
904 } else {
905 self.asm.mul_rdx_rax(mul_src);
906 }
907
908 if rd_lo != phi11 {
911 self.asm.mov_rr(rd_lo, phi11);
912 }
913 self.asm.mov_rr(rd_hi, SCRATCH);
915
916 if need_save_phi11 {
917 self.asm.pop(phi11);
918 }
919
920 self.invalidate_all_regs();
921 Some(pc2 + 1 + skip2 - pc)
922 }
923
924 fn emit_mem_read(&mut self, dst: Reg, _addr_reg: Reg, fn_addr: u64, pvm_pc: u32) {
927 self.emit_mem_read_sized(dst, fn_addr, 0, pvm_pc);
928 }
929
930 fn emit_mem_read_sized(&mut self, dst: Reg, fn_addr: u64, width_bytes: u32, pvm_pc: u32) {
934 let w = if width_bytes > 0 {
935 width_bytes
936 } else if fn_addr == self.helpers.mem_read_u8 {
937 1
938 } else if fn_addr == self.helpers.mem_read_u16 {
939 2
940 } else if fn_addr == self.helpers.mem_read_u32 {
941 4
942 } else {
943 8
944 };
945
946 self.trap_entries.push((self.asm.offset() as u32, pvm_pc));
948
949 match w {
950 1 => self.asm.movzx_load8_at_index(dst, SCRATCH),
951 2 => self.asm.movzx_load16_at_index(dst, SCRATCH),
952 4 => self.asm.mov_load32_at_index(dst, SCRATCH),
953 8 => self.asm.mov_load64_at_index(dst, SCRATCH),
954 _ => unreachable!(),
955 }
956 }
957
958 fn emit_sign_extend(&mut self, opcode: Opcode, reg: Reg) {
961 match opcode {
962 Opcode::LoadI8 | Opcode::LoadIndI8 => self.asm.movsx_8_64(reg, reg),
963 Opcode::LoadI16 | Opcode::LoadIndI16 => self.asm.movsx_16_64(reg, reg),
964 Opcode::LoadI32 | Opcode::LoadIndI32 => self.asm.movsxd(reg, reg),
965 _ => {}
966 }
967 }
968
969 fn emit_mem_write(&mut self, _addr_in_scratch: bool, val_reg: Reg, fn_addr: u64, pvm_pc: u32) {
972 let w = if fn_addr == self.helpers.mem_write_u8 {
973 1u32
974 } else if fn_addr == self.helpers.mem_write_u16 {
975 2
976 } else if fn_addr == self.helpers.mem_write_u32 {
977 4
978 } else {
979 8
980 };
981
982 self.trap_entries.push((self.asm.offset() as u32, pvm_pc));
984
985 match w {
986 1 => self.asm.mov_store8_at_index(SCRATCH, val_reg),
987 2 => self.asm.mov_store16_at_index(SCRATCH, val_reg),
988 4 => self.asm.mov_store32_at_index(SCRATCH, val_reg),
989 8 => self.asm.mov_store64_at_index(SCRATCH, val_reg),
990 _ => unreachable!(),
991 }
992 }
993
994 fn emit_store_imm_ind(
998 &mut self,
999 opcode: Opcode,
1000 ra: usize,
1001 imm_x: i32,
1002 imm_y: u64,
1003 _pvm_pc: u32,
1004 ) {
1005 self.emit_addr_to_scratch(ra, imm_x);
1007
1008 let fits_i32 = {
1009 let imm_i64 = imm_y as i64;
1010 imm_i64 >= i32::MIN as i64 && imm_i64 <= i32::MAX as i64
1011 };
1012
1013 self.trap_entries.push((self.asm.offset() as u32, _pvm_pc));
1015
1016 match opcode {
1017 Opcode::StoreImmIndU8 => {
1018 self.asm.mov_store8_at_index_imm(SCRATCH, imm_y as u8);
1019 }
1020 Opcode::StoreImmIndU16 => {
1021 self.asm.mov_store16_at_index_imm(SCRATCH, imm_y as u16);
1022 }
1023 Opcode::StoreImmIndU32 => {
1024 self.asm.mov_store32_at_index_imm(SCRATCH, imm_y as i32);
1025 }
1026 Opcode::StoreImmIndU64 if fits_i32 => {
1027 self.asm.mov_store64_at_index_imm(SCRATCH, imm_y as i32);
1029 }
1030 Opcode::StoreImmIndU64 => {
1031 self.asm.push(Reg::RCX);
1033 self.asm.mov_ri64(Reg::RCX, imm_y);
1034 self.asm.mov_store64_at_index(SCRATCH, Reg::RCX);
1035 self.asm.pop(Reg::RCX);
1036 }
1037 _ => unreachable!(),
1038 }
1039 }
1040
1041 fn emit_addr_to_scratch(&mut self, rb: usize, imm: i32) {
1043 if let RegDef::Const(addr) = self.reg_defs[rb] {
1045 let effective = addr.wrapping_add(imm as u32);
1046 self.asm.mov_ri32(SCRATCH, effective);
1047 return;
1048 }
1049 if imm == 0
1051 && let RegDef::ScaledAdd { base, idx, shift } = self.reg_defs[rb]
1052 {
1053 self.asm
1054 .lea_sib_scaled_32(SCRATCH, REG_MAP[base], REG_MAP[idx], shift);
1055 return;
1056 }
1057 let rb_reg = REG_MAP[rb];
1058 if imm != 0 {
1059 self.asm.lea_32(SCRATCH, rb_reg, imm);
1062 } else {
1063 self.asm.movzx_32_64(SCRATCH, rb_reg);
1064 }
1065 }
1066
1067 #[inline]
1069 fn invalidate_dependents(&mut self, reg: usize) {
1070 let mut active = self.reg_defs_active & !(1u16 << reg);
1072 while active != 0 {
1073 let i = active.trailing_zeros() as usize;
1074 active &= active - 1;
1075 let depends = match self.reg_defs[i] {
1076 RegDef::Shifted { src, .. } => src == reg,
1077 RegDef::ScaledAdd { base, idx, .. } => base == reg || idx == reg,
1078 _ => false,
1079 };
1080 if depends {
1081 self.reg_defs[i] = RegDef::Unknown;
1082 self.reg_defs_active &= !(1u16 << i);
1083 }
1084 }
1085 }
1086
1087 #[inline]
1089 fn invalidate_reg(&mut self, reg: usize) {
1090 self.reg_defs[reg] = RegDef::Unknown;
1091 self.reg_defs_active &= !(1u16 << reg);
1092 self.invalidate_dependents(reg);
1093 }
1094
1095 #[inline]
1097 fn invalidate_all_regs(&mut self) {
1098 self.reg_defs = [RegDef::Unknown; 13];
1099 self.reg_defs_active = 0;
1100 }
1101
1102 fn emit_gas_block_start(
1109 &mut self,
1110 pc: usize,
1111 pending_gas: &mut Option<(Label, u32, usize)>,
1112 gas_sim: &mut GasSimulator,
1113 ) {
1114 let label = Label(self.label_base + pc as u32);
1115 self.asm.bind_label(label);
1116 self.gas_block_pcs.push(pc as u32);
1117 self.invalidate_all_regs();
1118 self.last_add_cf = None; if let Some((stub_label, block_pc, patch_offset)) = pending_gas.take() {
1121 let cost = gas_sim.flush_and_get_cost();
1122 self.asm.patch_i32(patch_offset, cost as i32);
1123 self.oog_stubs.push((stub_label, block_pc, cost));
1124 }
1125 gas_sim.reset();
1126
1127 let stub_label = self.asm.new_label();
1128 self.asm.sub_r64_imm32_patchable(GAS, 0);
1129 let patch_offset = self.asm.offset() - 4;
1130 self.asm.jcc_label(Cc::S, stub_label);
1131 *pending_gas = Some((stub_label, pc as u32, patch_offset));
1132 }
1133
1134 fn update_reg_defs(&mut self, opcode: Opcode, args: &Args) {
1138 match opcode {
1139 Opcode::Add64 => {
1140 if let Args::ThreeReg { ra, rb, rd } = args {
1141 if *ra == *rb && *ra == *rd {
1142 if let RegDef::Shifted { src, shift } = self.reg_defs[*rd] {
1144 if shift < 3 {
1145 self.reg_defs[*rd] = RegDef::Shifted {
1146 src,
1147 shift: shift + 1,
1148 };
1149 self.reg_defs_active |= 1u16 << *rd;
1150 } else {
1151 self.reg_defs[*rd] = RegDef::Unknown;
1152 self.reg_defs_active &= !(1u16 << *rd);
1153 }
1154 } else {
1155 self.reg_defs[*rd] = RegDef::Unknown;
1156 self.reg_defs_active &= !(1u16 << *rd);
1157 }
1158 } else if *ra == *rb {
1159 if *rd != *ra {
1165 self.reg_defs[*rd] = RegDef::Shifted { src: *ra, shift: 1 };
1166 self.reg_defs_active |= 1u16 << *rd;
1167 } else {
1168 self.reg_defs[*rd] = RegDef::Unknown;
1169 self.reg_defs_active &= !(1u16 << *rd);
1170 }
1171 } else {
1172 let def = if let RegDef::Shifted { src, shift } = self.reg_defs[*rb] {
1174 Some((*ra, src, shift))
1175 } else if let RegDef::Shifted { src, shift } = self.reg_defs[*ra] {
1176 Some((*rb, src, shift))
1177 } else {
1178 None
1179 };
1180 if let Some((base, idx, shift)) = def {
1181 self.reg_defs[*rd] = RegDef::ScaledAdd { base, idx, shift };
1182 self.reg_defs_active |= 1u16 << *rd;
1183 } else {
1184 self.reg_defs[*rd] = RegDef::Unknown;
1185 self.reg_defs_active &= !(1u16 << *rd);
1186 }
1187 }
1188 self.invalidate_dependents(*rd);
1189 }
1190 }
1191 Opcode::LoadImm => {
1192 if let Args::RegImm { ra, imm } = args {
1193 self.reg_defs[*ra] = RegDef::Const(*imm as u32);
1194 self.reg_defs_active |= 1u16 << *ra;
1195 self.invalidate_dependents(*ra);
1196 }
1197 }
1198 Opcode::LoadImm64 => {
1199 if let Args::RegExtImm { ra, imm } = args {
1200 self.reg_defs[*ra] = RegDef::Const(*imm as u32);
1201 self.reg_defs_active |= 1u16 << *ra;
1202 self.invalidate_dependents(*ra);
1203 }
1204 }
1205 Opcode::ShloLImm64 => {
1209 if let Args::TwoRegImm { ra, rb, imm } = args {
1210 let shift = (*imm as u32 % 64) as u8;
1211 if (1..=3).contains(&shift) && ra != rb {
1215 self.reg_defs[*ra] = RegDef::Shifted { src: *rb, shift };
1216 self.reg_defs_active |= 1u16 << *ra;
1217 } else {
1218 self.reg_defs[*ra] = RegDef::Unknown;
1219 self.reg_defs_active &= !(1u16 << *ra);
1220 }
1221 self.invalidate_dependents(*ra);
1222 }
1223 }
1224 Opcode::MoveReg => {
1225 if let Args::TwoReg { rd, ra } = args
1226 && *rd != *ra
1227 {
1228 self.reg_defs[*rd] = self.reg_defs[*ra];
1230 if matches!(self.reg_defs[*rd], RegDef::Unknown) {
1231 self.reg_defs_active &= !(1u16 << *rd);
1232 } else {
1233 self.reg_defs_active |= 1u16 << *rd;
1234 }
1235 self.invalidate_dependents(*rd);
1236 }
1237 }
1238 _ => {
1239 match args {
1240 Args::ThreeReg { rd, .. } => self.invalidate_reg(*rd),
1241 Args::TwoReg { rd, .. } => self.invalidate_reg(*rd),
1242 Args::TwoRegImm { ra, .. } => self.invalidate_reg(*ra),
1243 Args::RegImm { ra, .. } => self.invalidate_reg(*ra),
1244 Args::RegExtImm { ra, .. } => self.invalidate_reg(*ra),
1245 _ => {}
1246 }
1247 if opcode.is_terminator() {
1248 self.invalidate_all_regs();
1249 }
1250 }
1251 }
1252 }
1253
1254 #[inline(always)]
1257 fn compile_instruction(&mut self, opcode: Opcode, args: &Args, pc: u32, next_pc: u32) {
1258 match opcode {
1259 Opcode::Trap => {
1261 self.asm.mov_store32_rip_rel_imm(CTX_PC, pc as i32);
1262 self.emit_exit(EXIT_TRAP, 0);
1263 }
1264 Opcode::Fallthrough | Opcode::Unlikely => {
1265 }
1268
1269 Opcode::Ecall => {
1271 self.asm.mov_store32_rip_rel_imm(CTX_PC, next_pc as i32);
1272 self.emit_exit(EXIT_ECALL, 0);
1273 }
1274
1275 Opcode::Ecalli => {
1277 if let Args::Imm { imm } = args {
1278 let cap_slot = *imm as u32;
1279 self.asm.mov_store32_rip_rel_imm(CTX_PC, next_pc as i32);
1285 self.emit_exit(EXIT_HOST_CALL, cap_slot);
1286 }
1287 }
1288
1289 Opcode::LoadImm64 => {
1291 if let Args::RegExtImm { ra, imm } = args {
1292 self.asm.mov_ri64(REG_MAP[*ra], *imm);
1293 }
1294 }
1295
1296 Opcode::StoreImmU8
1298 | Opcode::StoreImmU16
1299 | Opcode::StoreImmU32
1300 | Opcode::StoreImmU64 => {
1301 if let Args::TwoImm { imm_x, imm_y } = args {
1302 let addr = *imm_x as u32;
1305 self.asm.mov_ri32(SCRATCH, addr);
1306 let imm_val = *imm_y;
1307
1308 let fits_i32 = {
1309 let imm_i64 = imm_val as i64;
1310 imm_i64 >= i32::MIN as i64 && imm_i64 <= i32::MAX as i64
1311 };
1312
1313 self.trap_entries.push((self.asm.offset() as u32, pc));
1315 match opcode {
1316 Opcode::StoreImmU8 => {
1317 self.asm.mov_store8_at_index_imm(SCRATCH, imm_val as u8);
1318 }
1319 Opcode::StoreImmU16 => {
1320 self.asm.mov_store16_at_index_imm(SCRATCH, imm_val as u16);
1321 }
1322 Opcode::StoreImmU32 => {
1323 self.asm.mov_store32_at_index_imm(SCRATCH, imm_val as i32);
1324 }
1325 Opcode::StoreImmU64 if fits_i32 => {
1326 self.asm.mov_store64_at_index_imm(SCRATCH, imm_val as i32);
1327 }
1328 Opcode::StoreImmU64 => {
1329 self.asm.push(Reg::RCX);
1330 self.asm.mov_ri64(Reg::RCX, imm_val);
1331 self.asm.mov_store64_at_index(SCRATCH, Reg::RCX);
1332 self.asm.pop(Reg::RCX);
1333 }
1334 _ => unreachable!(),
1335 }
1336 }
1337 }
1338
1339 Opcode::Jump => {
1341 if let Args::Offset { offset } = args {
1342 self.emit_static_branch(*offset as u32, true, next_pc, pc);
1343 }
1344 }
1345
1346 Opcode::JumpInd => {
1348 if let Args::RegImm { ra, imm } = args {
1349 self.emit_dynamic_jump(*ra, *imm, pc);
1350 }
1351 }
1352 Opcode::LoadImm => {
1353 if let Args::RegImm { ra, imm } = args {
1354 self.asm.mov_ri64(REG_MAP[*ra], *imm);
1355 }
1356 }
1357 Opcode::LoadU8
1358 | Opcode::LoadI8
1359 | Opcode::LoadU16
1360 | Opcode::LoadI16
1361 | Opcode::LoadU32
1362 | Opcode::LoadI32
1363 | Opcode::LoadU64 => {
1364 if let Args::RegImm { ra, imm } = args {
1365 let addr = *imm as u32;
1366 let fn_addr = self.read_fn_for(opcode);
1367 self.asm.mov_ri32(SCRATCH, addr);
1368 let ra_reg = REG_MAP[*ra];
1369 self.emit_mem_read(ra_reg, SCRATCH, fn_addr, pc);
1370 self.emit_sign_extend(opcode, ra_reg);
1371 }
1372 }
1373 Opcode::StoreU8 | Opcode::StoreU16 | Opcode::StoreU32 | Opcode::StoreU64 => {
1374 if let Args::RegImm { ra, imm } = args {
1375 let addr = *imm as u32;
1376 let ra_reg = REG_MAP[*ra];
1377 let fn_addr = self.write_fn_for(opcode);
1378 self.asm.mov_ri32(SCRATCH, addr);
1379 self.emit_mem_write(true, ra_reg, fn_addr, pc);
1380 }
1381 }
1382
1383 Opcode::StoreImmIndU8
1385 | Opcode::StoreImmIndU16
1386 | Opcode::StoreImmIndU32
1387 | Opcode::StoreImmIndU64 => {
1388 if let Args::RegTwoImm { ra, imm_x, imm_y } = args {
1389 self.emit_store_imm_ind(opcode, *ra, *imm_x as i32, *imm_y, pc);
1390 }
1391 }
1392
1393 Opcode::LoadImmJump => {
1395 if let Args::RegImmOffset { ra, imm, offset } = args {
1396 self.asm.mov_ri64(REG_MAP[*ra], *imm);
1397 self.emit_static_branch(*offset as u32, true, next_pc, pc);
1398 }
1399 }
1400 Opcode::BranchEqImm => {
1401 if let Args::RegImmOffset { ra, imm, offset } = args {
1402 let ra_reg = REG_MAP[*ra];
1403 self.emit_branch_imm(ra_reg, *imm, Cc::E, *offset as u32, next_pc, pc);
1404 }
1405 }
1406 Opcode::BranchNeImm => {
1407 if let Args::RegImmOffset { ra, imm, offset } = args {
1408 let ra_reg = REG_MAP[*ra];
1409 self.emit_branch_imm(ra_reg, *imm, Cc::NE, *offset as u32, next_pc, pc);
1410 }
1411 }
1412 Opcode::BranchLtUImm => {
1413 if let Args::RegImmOffset { ra, imm, offset } = args {
1414 let ra_reg = REG_MAP[*ra];
1415 self.emit_branch_imm(ra_reg, *imm, Cc::B, *offset as u32, next_pc, pc);
1416 }
1417 }
1418 Opcode::BranchLeUImm => {
1419 if let Args::RegImmOffset { ra, imm, offset } = args {
1420 let ra_reg = REG_MAP[*ra];
1421 self.emit_branch_imm(ra_reg, *imm, Cc::BE, *offset as u32, next_pc, pc);
1422 }
1423 }
1424 Opcode::BranchGeUImm => {
1425 if let Args::RegImmOffset { ra, imm, offset } = args {
1426 let ra_reg = REG_MAP[*ra];
1427 self.emit_branch_imm(ra_reg, *imm, Cc::AE, *offset as u32, next_pc, pc);
1428 }
1429 }
1430 Opcode::BranchGtUImm => {
1431 if let Args::RegImmOffset { ra, imm, offset } = args {
1432 let ra_reg = REG_MAP[*ra];
1433 self.emit_branch_imm(ra_reg, *imm, Cc::A, *offset as u32, next_pc, pc);
1434 }
1435 }
1436 Opcode::BranchLtSImm => {
1437 if let Args::RegImmOffset { ra, imm, offset } = args {
1438 let ra_reg = REG_MAP[*ra];
1439 self.emit_branch_imm(ra_reg, *imm, Cc::L, *offset as u32, next_pc, pc);
1440 }
1441 }
1442 Opcode::BranchLeSImm => {
1443 if let Args::RegImmOffset { ra, imm, offset } = args {
1444 let ra_reg = REG_MAP[*ra];
1445 self.emit_branch_imm(ra_reg, *imm, Cc::LE, *offset as u32, next_pc, pc);
1446 }
1447 }
1448 Opcode::BranchGeSImm => {
1449 if let Args::RegImmOffset { ra, imm, offset } = args {
1450 let ra_reg = REG_MAP[*ra];
1451 self.emit_branch_imm(ra_reg, *imm, Cc::GE, *offset as u32, next_pc, pc);
1452 }
1453 }
1454 Opcode::BranchGtSImm => {
1455 if let Args::RegImmOffset { ra, imm, offset } = args {
1456 let ra_reg = REG_MAP[*ra];
1457 self.emit_branch_imm(ra_reg, *imm, Cc::G, *offset as u32, next_pc, pc);
1458 }
1459 }
1460
1461 Opcode::MoveReg => {
1463 if let Args::TwoReg { rd, ra } = args {
1464 let ra_reg = REG_MAP[*ra];
1465 self.asm.mov_rr(REG_MAP[*rd], ra_reg);
1466 }
1467 }
1468 Opcode::Sbrk => {
1469 self.asm.mov_store32_rip_rel_imm(CTX_PC, pc as i32);
1471 self.emit_exit(EXIT_PANIC, 0);
1472 }
1473 Opcode::CountSetBits64 => {
1474 if let Args::TwoReg { rd, ra } = args {
1475 let ra_reg = REG_MAP[*ra];
1476 self.asm.popcnt64(REG_MAP[*rd], ra_reg);
1477 }
1478 }
1479 Opcode::CountSetBits32 => {
1480 if let Args::TwoReg { rd, ra } = args {
1481 let ra_reg = REG_MAP[*ra];
1482 self.asm.popcnt32(REG_MAP[*rd], ra_reg);
1484 }
1485 }
1486 Opcode::LeadingZeroBits64 => {
1487 if let Args::TwoReg { rd, ra } = args {
1488 let ra_reg = REG_MAP[*ra];
1489 self.asm.lzcnt64(REG_MAP[*rd], ra_reg);
1490 }
1491 }
1492 Opcode::LeadingZeroBits32 => {
1493 if let Args::TwoReg { rd, ra } = args {
1494 let ra_reg = REG_MAP[*ra];
1495 self.asm.lzcnt32(REG_MAP[*rd], ra_reg);
1497 }
1498 }
1499 Opcode::TrailingZeroBits64 => {
1500 if let Args::TwoReg { rd, ra } = args {
1501 let ra_reg = REG_MAP[*ra];
1502 self.asm.tzcnt64(REG_MAP[*rd], ra_reg);
1503 }
1504 }
1505 Opcode::TrailingZeroBits32 => {
1506 if let Args::TwoReg { rd, ra } = args {
1507 let ra_reg = REG_MAP[*ra];
1508 self.asm.tzcnt32(REG_MAP[*rd], ra_reg);
1510 }
1511 }
1512 Opcode::SignExtend8 => {
1513 if let Args::TwoReg { rd, ra } = args {
1514 let ra_reg = REG_MAP[*ra];
1515 self.asm.movsx_8_64(REG_MAP[*rd], ra_reg);
1516 }
1517 }
1518 Opcode::SignExtend16 => {
1519 if let Args::TwoReg { rd, ra } = args {
1520 let ra_reg = REG_MAP[*ra];
1521 self.asm.movsx_16_64(REG_MAP[*rd], ra_reg);
1522 }
1523 }
1524 Opcode::ZeroExtend16 => {
1525 if let Args::TwoReg { rd, ra } = args {
1526 let ra_reg = REG_MAP[*ra];
1527 self.asm.movzx_16_64(REG_MAP[*rd], ra_reg);
1528 }
1529 }
1530 Opcode::ReverseBytes => {
1531 if let Args::TwoReg { rd, ra } = args {
1532 let ra_reg = REG_MAP[*ra];
1533 if *rd != *ra {
1534 self.asm.mov_rr(REG_MAP[*rd], ra_reg);
1535 }
1536 self.asm.bswap64(REG_MAP[*rd]);
1537 }
1538 }
1539
1540 Opcode::StoreIndU8
1542 | Opcode::StoreIndU16
1543 | Opcode::StoreIndU32
1544 | Opcode::StoreIndU64 => {
1545 if let Args::TwoRegImm { ra, rb, imm } = args {
1546 let ra_reg = REG_MAP[*ra];
1547 self.emit_addr_to_scratch(*rb, *imm as i32);
1548 let fn_addr = self.write_fn_for(opcode);
1549 self.emit_mem_write(true, ra_reg, fn_addr, pc);
1550 }
1551 }
1552 Opcode::LoadIndU8
1553 | Opcode::LoadIndI8
1554 | Opcode::LoadIndU16
1555 | Opcode::LoadIndI16
1556 | Opcode::LoadIndU32
1557 | Opcode::LoadIndI32
1558 | Opcode::LoadIndU64 => {
1559 if let Args::TwoRegImm { ra, rb, imm } = args {
1560 let ra_reg = REG_MAP[*ra];
1561 self.emit_addr_to_scratch(*rb, *imm as i32);
1562 let fn_addr = self.read_fn_for(opcode);
1563 self.emit_mem_read(ra_reg, SCRATCH, fn_addr, pc);
1564 self.emit_sign_extend(opcode, ra_reg);
1565 }
1566 }
1567 Opcode::AddImm32 => {
1568 if let Args::TwoRegImm { ra, rb, imm } = args {
1569 let rb_reg = REG_MAP[*rb];
1570 if *ra != *rb {
1571 self.asm.mov_rr(REG_MAP[*ra], rb_reg);
1572 }
1573 self.asm.add_ri32(REG_MAP[*ra], *imm as i32);
1574 self.asm.movsxd(REG_MAP[*ra], REG_MAP[*ra]);
1575 }
1576 }
1577 Opcode::AddImm64 => {
1578 if let Args::TwoRegImm { ra, rb, imm } = args {
1579 let rb_reg = REG_MAP[*rb];
1580 if *ra != *rb {
1581 self.asm.mov_rr(REG_MAP[*ra], rb_reg);
1582 }
1583 if *imm as i32 == 1 {
1584 self.asm.inc64(REG_MAP[*ra]);
1585 } else if *imm as i32 == -1 {
1586 self.asm.dec64(REG_MAP[*ra]);
1587 } else {
1588 self.asm.add_ri(REG_MAP[*ra], *imm as i32);
1589 }
1590 }
1591 }
1592 Opcode::AndImm => {
1593 if let Args::TwoRegImm { ra, rb, imm } = args {
1594 let rb_reg = REG_MAP[*rb];
1595 if *ra != *rb {
1596 self.asm.mov_rr(REG_MAP[*ra], rb_reg);
1597 }
1598 self.asm.and_ri(REG_MAP[*ra], *imm as i32);
1599 }
1600 }
1601 Opcode::XorImm => {
1602 if let Args::TwoRegImm { ra, rb, imm } = args {
1603 let rb_reg = REG_MAP[*rb];
1604 if *ra != *rb {
1605 self.asm.mov_rr(REG_MAP[*ra], rb_reg);
1606 }
1607 self.asm.xor_ri(REG_MAP[*ra], *imm as i32);
1608 }
1609 }
1610 Opcode::OrImm => {
1611 if let Args::TwoRegImm { ra, rb, imm } = args {
1612 let rb_reg = REG_MAP[*rb];
1613 if *ra != *rb {
1614 self.asm.mov_rr(REG_MAP[*ra], rb_reg);
1615 }
1616 self.asm.or_ri(REG_MAP[*ra], *imm as i32);
1617 }
1618 }
1619 Opcode::MulImm32 => {
1620 if let Args::TwoRegImm { ra, rb, imm } = args {
1621 let rb_reg = REG_MAP[*rb];
1622 self.asm.imul_rri32(REG_MAP[*ra], rb_reg, *imm as i32);
1623 self.asm.movsxd(REG_MAP[*ra], REG_MAP[*ra]);
1624 }
1625 }
1626 Opcode::MulImm64 => {
1627 if let Args::TwoRegImm { ra, rb, imm } = args {
1628 let rb_reg = REG_MAP[*rb];
1629 self.asm.imul_rri(REG_MAP[*ra], rb_reg, *imm as i32);
1630 }
1631 }
1632 Opcode::SetLtUImm => {
1633 if let Args::TwoRegImm { ra, rb, imm } = args {
1634 self.emit_setcc_imm(*ra, *rb, *imm, Cc::B);
1635 }
1636 }
1637 Opcode::SetLtSImm => {
1638 if let Args::TwoRegImm { ra, rb, imm } = args {
1639 self.emit_setcc_imm(*ra, *rb, *imm, Cc::L);
1640 }
1641 }
1642 Opcode::SetGtUImm => {
1643 if let Args::TwoRegImm { ra, rb, imm } = args {
1644 self.emit_setcc_imm(*ra, *rb, *imm, Cc::A);
1645 }
1646 }
1647 Opcode::SetGtSImm => {
1648 if let Args::TwoRegImm { ra, rb, imm } = args {
1649 self.emit_setcc_imm(*ra, *rb, *imm, Cc::G);
1650 }
1651 }
1652 Opcode::ShloLImm32 => {
1653 if let Args::TwoRegImm { ra, rb, imm } = args {
1654 let rb_reg = REG_MAP[*rb];
1655 if *ra != *rb {
1656 self.asm.mov_rr(REG_MAP[*ra], rb_reg);
1657 }
1658 self.asm.shl_ri32(REG_MAP[*ra], (*imm as u8) & 31);
1659 self.asm.movsxd(REG_MAP[*ra], REG_MAP[*ra]);
1660 }
1661 }
1662 Opcode::ShloRImm32 => {
1663 if let Args::TwoRegImm { ra, rb, imm } = args {
1664 let rb_reg = REG_MAP[*rb];
1665 if *ra != *rb {
1666 self.asm.mov_rr(REG_MAP[*ra], rb_reg);
1667 }
1668 self.asm.movzx_32_64(REG_MAP[*ra], REG_MAP[*ra]);
1669 self.asm.shr_ri32(REG_MAP[*ra], (*imm as u8) & 31);
1670 self.asm.movsxd(REG_MAP[*ra], REG_MAP[*ra]);
1671 }
1672 }
1673 Opcode::SharRImm32 => {
1674 if let Args::TwoRegImm { ra, rb, imm } = args {
1675 let rb_reg = REG_MAP[*rb];
1676 if *ra != *rb {
1677 self.asm.mov_rr(REG_MAP[*ra], rb_reg);
1678 }
1679 self.asm.sar_ri32(REG_MAP[*ra], (*imm as u8) & 31);
1680 self.asm.movsxd(REG_MAP[*ra], REG_MAP[*ra]);
1681 }
1682 }
1683 Opcode::ShloLImm64 => {
1684 if let Args::TwoRegImm { ra, rb, imm } = args {
1685 let rb_reg = REG_MAP[*rb];
1686 if *ra != *rb {
1687 self.asm.mov_rr(REG_MAP[*ra], rb_reg);
1688 }
1689 self.asm.shl_ri64(REG_MAP[*ra], (*imm as u8) & 63);
1690 }
1691 }
1692 Opcode::ShloRImm64 => {
1693 if let Args::TwoRegImm { ra, rb, imm } = args {
1694 let rb_reg = REG_MAP[*rb];
1695 if *ra != *rb {
1696 self.asm.mov_rr(REG_MAP[*ra], rb_reg);
1697 }
1698 self.asm.shr_ri64(REG_MAP[*ra], (*imm as u8) & 63);
1699 }
1700 }
1701 Opcode::SharRImm64 => {
1702 if let Args::TwoRegImm { ra, rb, imm } = args {
1703 let rb_reg = REG_MAP[*rb];
1704 if *ra != *rb {
1705 self.asm.mov_rr(REG_MAP[*ra], rb_reg);
1706 }
1707 self.asm.sar_ri64(REG_MAP[*ra], (*imm as u8) & 63);
1708 }
1709 }
1710 Opcode::NegAddImm32 => {
1711 if let Args::TwoRegImm { ra, rb, imm } = args {
1712 let rb_reg = REG_MAP[*rb];
1713 if *ra == *rb {
1715 self.asm.mov_rr(SCRATCH, rb_reg);
1716 self.asm.mov_ri64(REG_MAP[*ra], *imm);
1717 self.asm.sub_rr32(REG_MAP[*ra], SCRATCH);
1718 } else {
1719 self.asm.mov_ri64(REG_MAP[*ra], *imm);
1720 self.asm.sub_rr32(REG_MAP[*ra], rb_reg);
1721 }
1722 self.asm.movsxd(REG_MAP[*ra], REG_MAP[*ra]);
1723 }
1724 }
1725 Opcode::NegAddImm64 => {
1726 if let Args::TwoRegImm { ra, rb, imm } = args {
1727 let rb_reg = REG_MAP[*rb];
1728 if *ra == *rb {
1729 self.asm.mov_rr(SCRATCH, rb_reg);
1730 self.asm.mov_ri64(REG_MAP[*ra], *imm);
1731 self.asm.sub_rr(REG_MAP[*ra], SCRATCH);
1732 } else {
1733 self.asm.mov_ri64(REG_MAP[*ra], *imm);
1734 self.asm.sub_rr(REG_MAP[*ra], rb_reg);
1735 }
1736 }
1737 }
1738 Opcode::ShloLImmAlt32 => {
1740 if let Args::TwoRegImm { ra, rb, imm } = args {
1741 let rb_reg = REG_MAP[*rb];
1743 let shift_src = if *ra == *rb {
1744 self.asm.mov_rr(SCRATCH, rb_reg);
1745 SCRATCH
1746 } else {
1747 rb_reg
1748 };
1749 self.asm.mov_ri64(REG_MAP[*ra], *imm);
1750 self.emit_shift_by_reg32(REG_MAP[*ra], shift_src, 4); self.asm.movsxd(REG_MAP[*ra], REG_MAP[*ra]);
1752 }
1753 }
1754 Opcode::ShloRImmAlt32 => {
1755 if let Args::TwoRegImm { ra, rb, imm } = args {
1756 let rb_reg = REG_MAP[*rb];
1757 let shift_src = if *ra == *rb {
1758 self.asm.mov_rr(SCRATCH, rb_reg);
1759 SCRATCH
1760 } else {
1761 rb_reg
1762 };
1763 self.asm.mov_ri64(REG_MAP[*ra], *imm);
1764 self.asm.movzx_32_64(REG_MAP[*ra], REG_MAP[*ra]);
1765 self.emit_shift_by_reg32(REG_MAP[*ra], shift_src, 5); self.asm.movsxd(REG_MAP[*ra], REG_MAP[*ra]);
1767 }
1768 }
1769 Opcode::SharRImmAlt32 => {
1770 if let Args::TwoRegImm { ra, rb, imm } = args {
1771 let rb_reg = REG_MAP[*rb];
1772 let shift_src = if *ra == *rb {
1773 self.asm.mov_rr(SCRATCH, rb_reg);
1774 SCRATCH
1775 } else {
1776 rb_reg
1777 };
1778 self.asm.mov_ri64(REG_MAP[*ra], *imm);
1779 self.emit_shift_by_reg32(REG_MAP[*ra], shift_src, 7); self.asm.movsxd(REG_MAP[*ra], REG_MAP[*ra]);
1781 }
1782 }
1783 Opcode::ShloLImmAlt64 => {
1784 if let Args::TwoRegImm { ra, rb, imm } = args {
1785 let rb_reg = REG_MAP[*rb];
1786 let shift_src = if *ra == *rb {
1787 self.asm.mov_rr(SCRATCH, rb_reg);
1788 SCRATCH
1789 } else {
1790 rb_reg
1791 };
1792 self.asm.mov_ri64(REG_MAP[*ra], *imm);
1793 self.emit_shift_by_reg64(REG_MAP[*ra], shift_src, 4);
1794 }
1795 }
1796 Opcode::ShloRImmAlt64 => {
1797 if let Args::TwoRegImm { ra, rb, imm } = args {
1798 let rb_reg = REG_MAP[*rb];
1799 let shift_src = if *ra == *rb {
1800 self.asm.mov_rr(SCRATCH, rb_reg);
1801 SCRATCH
1802 } else {
1803 rb_reg
1804 };
1805 self.asm.mov_ri64(REG_MAP[*ra], *imm);
1806 self.emit_shift_by_reg64(REG_MAP[*ra], shift_src, 5);
1807 }
1808 }
1809 Opcode::SharRImmAlt64 => {
1810 if let Args::TwoRegImm { ra, rb, imm } = args {
1811 let rb_reg = REG_MAP[*rb];
1812 let shift_src = if *ra == *rb {
1813 self.asm.mov_rr(SCRATCH, rb_reg);
1814 SCRATCH
1815 } else {
1816 rb_reg
1817 };
1818 self.asm.mov_ri64(REG_MAP[*ra], *imm);
1819 self.emit_shift_by_reg64(REG_MAP[*ra], shift_src, 7);
1820 }
1821 }
1822 Opcode::CmovIzImm => {
1823 if let Args::TwoRegImm { ra, rb, imm } = args {
1824 let rb_reg = REG_MAP[*rb];
1829 let ra_reg = REG_MAP[*ra];
1830 self.asm.mov_ri64(SCRATCH, *imm);
1831 self.asm.test_rr(rb_reg, rb_reg);
1832 self.asm.cmovcc(Cc::E, ra_reg, SCRATCH);
1833 }
1834 }
1835 Opcode::CmovNzImm => {
1836 if let Args::TwoRegImm { ra, rb, imm } = args {
1837 let rb_reg = REG_MAP[*rb];
1838 let ra_reg = REG_MAP[*ra];
1839 self.asm.mov_ri64(SCRATCH, *imm);
1840 self.asm.test_rr(rb_reg, rb_reg);
1841 self.asm.cmovcc(Cc::NE, ra_reg, SCRATCH);
1842 }
1843 }
1844 Opcode::RotR64Imm => {
1845 if let Args::TwoRegImm { ra, rb, imm } = args {
1846 let rb_reg = REG_MAP[*rb];
1847 if *ra != *rb {
1848 self.asm.mov_rr(REG_MAP[*ra], rb_reg);
1849 }
1850 self.asm.ror_ri64(REG_MAP[*ra], (*imm as u8) & 63);
1851 }
1852 }
1853 Opcode::RotR64ImmAlt => {
1854 if let Args::TwoRegImm { ra, rb, imm } = args {
1855 let rb_reg = REG_MAP[*rb];
1857 let shift_src = if *ra == *rb {
1858 self.asm.mov_rr(SCRATCH, rb_reg);
1859 SCRATCH
1860 } else {
1861 rb_reg
1862 };
1863 self.asm.mov_ri64(REG_MAP[*ra], *imm);
1864 self.emit_shift_by_reg64(REG_MAP[*ra], shift_src, 1); }
1866 }
1867 Opcode::RotR32Imm => {
1868 if let Args::TwoRegImm { ra, rb, imm } = args {
1869 let rb_reg = REG_MAP[*rb];
1870 if *ra != *rb {
1871 self.asm.mov_rr(REG_MAP[*ra], rb_reg);
1872 }
1873 self.asm.movzx_32_64(REG_MAP[*ra], REG_MAP[*ra]);
1874 self.asm.ror_ri32(REG_MAP[*ra], (*imm as u8) & 31);
1875 self.asm.movsxd(REG_MAP[*ra], REG_MAP[*ra]);
1876 }
1877 }
1878 Opcode::RotR32ImmAlt => {
1879 if let Args::TwoRegImm { ra, rb, imm } = args {
1880 let rb_reg = REG_MAP[*rb];
1881 let shift_src = if *ra == *rb {
1882 self.asm.mov_rr(SCRATCH, rb_reg);
1883 SCRATCH
1884 } else {
1885 rb_reg
1886 };
1887 self.asm.mov_ri64(REG_MAP[*ra], *imm);
1888 self.asm.movzx_32_64(REG_MAP[*ra], REG_MAP[*ra]);
1889 self.emit_shift_by_reg32(REG_MAP[*ra], shift_src, 1); self.asm.movsxd(REG_MAP[*ra], REG_MAP[*ra]);
1891 }
1892 }
1893
1894 Opcode::BranchEq => {
1896 if let Args::TwoRegOffset { ra, rb, offset } = args {
1897 let (ra_reg, rb_reg) = (REG_MAP[*ra], REG_MAP[*rb]);
1900 self.emit_branch_reg(ra_reg, rb_reg, Cc::E, *offset as u32, next_pc, pc);
1901 }
1902 }
1903 Opcode::BranchNe => {
1904 if let Args::TwoRegOffset { ra, rb, offset } = args {
1905 let (ra_reg, rb_reg) = (REG_MAP[*ra], REG_MAP[*rb]);
1906 self.emit_branch_reg(ra_reg, rb_reg, Cc::NE, *offset as u32, next_pc, pc);
1907 }
1908 }
1909 Opcode::BranchLtU => {
1910 if let Args::TwoRegOffset { ra, rb, offset } = args {
1911 let (ra_reg, rb_reg) = (REG_MAP[*ra], REG_MAP[*rb]);
1912 self.emit_branch_reg(ra_reg, rb_reg, Cc::B, *offset as u32, next_pc, pc);
1913 }
1914 }
1915 Opcode::BranchLtS => {
1916 if let Args::TwoRegOffset { ra, rb, offset } = args {
1917 let (ra_reg, rb_reg) = (REG_MAP[*ra], REG_MAP[*rb]);
1918 self.emit_branch_reg(ra_reg, rb_reg, Cc::L, *offset as u32, next_pc, pc);
1919 }
1920 }
1921 Opcode::BranchGeU => {
1922 if let Args::TwoRegOffset { ra, rb, offset } = args {
1923 let (ra_reg, rb_reg) = (REG_MAP[*ra], REG_MAP[*rb]);
1924 self.emit_branch_reg(ra_reg, rb_reg, Cc::AE, *offset as u32, next_pc, pc);
1925 }
1926 }
1927 Opcode::BranchGeS => {
1928 if let Args::TwoRegOffset { ra, rb, offset } = args {
1929 let (ra_reg, rb_reg) = (REG_MAP[*ra], REG_MAP[*rb]);
1930 self.emit_branch_reg(ra_reg, rb_reg, Cc::GE, *offset as u32, next_pc, pc);
1931 }
1932 }
1933
1934 Opcode::LoadImmJumpInd => {
1936 if let Args::TwoRegTwoImm {
1937 ra,
1938 rb,
1939 imm_x,
1940 imm_y,
1941 } = args
1942 {
1943 self.asm.mov_ri64(REG_MAP[*ra], *imm_x);
1948 self.emit_dynamic_jump(*rb, *imm_y, pc);
1949 }
1950 }
1951
1952 Opcode::Add32 => {
1954 self.emit_alu3_32(args, |a, d, s| {
1955 a.add_rr32(d, s);
1956 });
1957 }
1958 Opcode::Sub32 => {
1959 self.emit_alu3_32_sub(args);
1960 }
1961 Opcode::Mul32 => {
1962 if let Args::ThreeReg { ra, rb, rd } = args {
1963 let (d, a, b) = (REG_MAP[*rd], REG_MAP[*ra], REG_MAP[*rb]);
1964 if *rd == *rb && *rd != *ra {
1965 self.asm.mov_rr(SCRATCH, b);
1966 self.asm.mov_rr(d, a);
1967 self.asm.imul_rr32(d, SCRATCH);
1968 } else {
1969 if *rd != *ra {
1970 self.asm.mov_rr(d, a);
1971 }
1972 self.asm.imul_rr32(d, b);
1973 }
1974 self.asm.movsxd(d, d);
1975 }
1976 }
1977 Opcode::Add64 => {
1978 self.emit_alu3_64_comm(args, true, |a, d, s| {
1979 a.add_rr(d, s);
1980 });
1981 if let Args::ThreeReg { ra, rb, rd } = args {
1984 self.last_add_cf = Some((*rd, *ra, *rb));
1985 }
1986 }
1988 Opcode::Sub64 => {
1989 if let Args::ThreeReg { ra, rb, rd } = args {
1990 let (d, a, b) = (REG_MAP[*rd], REG_MAP[*ra], REG_MAP[*rb]);
1991 if *rd == *rb && *rd != *ra {
1992 self.asm.neg64(d);
1994 self.asm.add_rr(d, a);
1995 } else {
1996 if *rd != *ra {
1997 self.asm.mov_rr(d, a);
1998 }
1999 self.asm.sub_rr(d, b);
2000 }
2001 }
2002 }
2003 Opcode::Mul64 => {
2004 self.emit_alu3_64_comm(args, true, |a, d, s| {
2005 a.imul_rr(d, s);
2006 });
2007 }
2008 Opcode::And => {
2009 self.emit_alu3_64_comm(args, true, |a, d, s| {
2010 a.and_rr(d, s);
2011 });
2012 }
2013 Opcode::Or => {
2014 self.emit_alu3_64_comm(args, true, |a, d, s| {
2015 a.or_rr(d, s);
2016 });
2017 }
2018 Opcode::Xor => {
2019 self.emit_alu3_64_comm(args, true, |a, d, s| {
2020 a.xor_rr(d, s);
2021 });
2022 }
2023
2024 Opcode::DivU32 => {
2026 self.emit_div(args, false, false, true);
2027 }
2028 Opcode::DivS32 => {
2029 self.emit_div(args, true, false, true);
2030 }
2031 Opcode::RemU32 => {
2032 self.emit_div(args, false, true, true);
2033 }
2034 Opcode::RemS32 => {
2035 self.emit_div(args, true, true, true);
2036 }
2037 Opcode::DivU64 => {
2038 self.emit_div(args, false, false, false);
2039 }
2040 Opcode::DivS64 => {
2041 self.emit_div(args, true, false, false);
2042 }
2043 Opcode::RemU64 => {
2044 self.emit_div(args, false, true, false);
2045 }
2046 Opcode::RemS64 => {
2047 self.emit_div(args, true, true, false);
2048 }
2049
2050 Opcode::ShloL32 => {
2053 if let Args::ThreeReg { ra, rb, rd } = args {
2054 let (d, a, b) = (REG_MAP[*rd], REG_MAP[*ra], REG_MAP[*rb]);
2055 let shift_src = if *rd == *rb && *rd != *ra {
2056 self.asm.mov_rr(SCRATCH, b);
2057 SCRATCH
2058 } else {
2059 b
2060 };
2061 if *rd != *ra {
2062 self.asm.mov_rr(d, a);
2063 }
2064 self.emit_shift_by_reg32(d, shift_src, 4);
2065 self.asm.movsxd(d, d);
2066 }
2067 }
2068 Opcode::ShloR32 => {
2069 if let Args::ThreeReg { ra, rb, rd } = args {
2070 let (d, a, b) = (REG_MAP[*rd], REG_MAP[*ra], REG_MAP[*rb]);
2071 let shift_src = if *rd == *rb && *rd != *ra {
2072 self.asm.mov_rr(SCRATCH, b);
2073 SCRATCH
2074 } else {
2075 b
2076 };
2077 if *rd != *ra {
2078 self.asm.mov_rr(d, a);
2079 }
2080 self.asm.movzx_32_64(d, d);
2081 self.emit_shift_by_reg32(d, shift_src, 5);
2082 self.asm.movsxd(d, d);
2083 }
2084 }
2085 Opcode::SharR32 => {
2086 if let Args::ThreeReg { ra, rb, rd } = args {
2087 let (d, a, b) = (REG_MAP[*rd], REG_MAP[*ra], REG_MAP[*rb]);
2088 let shift_src = if *rd == *rb && *rd != *ra {
2089 self.asm.mov_rr(SCRATCH, b);
2090 SCRATCH
2091 } else {
2092 b
2093 };
2094 if *rd != *ra {
2095 self.asm.mov_rr(d, a);
2096 }
2097 self.emit_shift_by_reg32(d, shift_src, 7);
2098 self.asm.movsxd(d, d);
2099 }
2100 }
2101 Opcode::ShloL64 => {
2102 if let Args::ThreeReg { ra, rb, rd } = args {
2103 let (d, a, b) = (REG_MAP[*rd], REG_MAP[*ra], REG_MAP[*rb]);
2104 let shift_src = if *rd == *rb && *rd != *ra {
2105 self.asm.mov_rr(SCRATCH, b);
2106 SCRATCH
2107 } else {
2108 b
2109 };
2110 if *rd != *ra {
2111 self.asm.mov_rr(d, a);
2112 }
2113 self.emit_shift_by_reg64(d, shift_src, 4);
2114 }
2115 }
2116 Opcode::ShloR64 => {
2117 if let Args::ThreeReg { ra, rb, rd } = args {
2118 let (d, a, b) = (REG_MAP[*rd], REG_MAP[*ra], REG_MAP[*rb]);
2119 let shift_src = if *rd == *rb && *rd != *ra {
2120 self.asm.mov_rr(SCRATCH, b);
2121 SCRATCH
2122 } else {
2123 b
2124 };
2125 if *rd != *ra {
2126 self.asm.mov_rr(d, a);
2127 }
2128 self.emit_shift_by_reg64(d, shift_src, 5);
2129 }
2130 }
2131 Opcode::SharR64 => {
2132 if let Args::ThreeReg { ra, rb, rd } = args {
2133 let (d, a, b) = (REG_MAP[*rd], REG_MAP[*ra], REG_MAP[*rb]);
2134 let shift_src = if *rd == *rb && *rd != *ra {
2135 self.asm.mov_rr(SCRATCH, b);
2136 SCRATCH
2137 } else {
2138 b
2139 };
2140 if *rd != *ra {
2141 self.asm.mov_rr(d, a);
2142 }
2143 self.emit_shift_by_reg64(d, shift_src, 7);
2144 }
2145 }
2146
2147 Opcode::MulUpperSS => {
2149 self.emit_mul_upper(args, true, true);
2150 }
2151 Opcode::MulUpperUU => {
2152 self.emit_mul_upper(args, false, false);
2153 }
2154 Opcode::MulUpperSU => {
2155 self.emit_mul_upper(args, true, false);
2156 }
2157
2158 Opcode::SetLtU => {
2160 if let Args::ThreeReg { ra, rb, rd } = args {
2161 let fused = if let Some((add_d, add_a, add_b)) = self.last_add_cf {
2166 if *ra == add_d
2172 && *rb != add_d
2173 && (*rb == add_a || *rb == add_b)
2174 && *rd != *rb
2175 {
2176 let d = REG_MAP[*rd];
2177 self.asm.setcc(Cc::B, d);
2181 self.asm.movzx_8_64(d, d);
2182 true
2183 } else {
2184 false
2185 }
2186 } else {
2187 false
2188 };
2189 if !fused {
2190 self.emit_setcc_3reg(*ra, *rb, *rd, Cc::B);
2191 }
2192 }
2193 }
2194 Opcode::SetLtS => {
2195 if let Args::ThreeReg { ra, rb, rd } = args {
2196 self.emit_setcc_3reg(*ra, *rb, *rd, Cc::L);
2197 }
2198 }
2199
2200 Opcode::CmovIz => {
2202 if let Args::ThreeReg { ra, rb, rd } = args {
2203 self.asm.test_rr(REG_MAP[*rb], REG_MAP[*rb]);
2205 self.asm.cmovcc(Cc::E, REG_MAP[*rd], REG_MAP[*ra]);
2206 }
2207 }
2208 Opcode::CmovNz => {
2209 if let Args::ThreeReg { ra, rb, rd } = args {
2210 self.asm.test_rr(REG_MAP[*rb], REG_MAP[*rb]);
2211 self.asm.cmovcc(Cc::NE, REG_MAP[*rd], REG_MAP[*ra]);
2212 }
2213 }
2214
2215 Opcode::RotL64 => {
2217 if let Args::ThreeReg { ra, rb, rd } = args {
2218 let (d, a, b) = (REG_MAP[*rd], REG_MAP[*ra], REG_MAP[*rb]);
2219 let shift_src = if *rd == *rb && *rd != *ra {
2220 self.asm.mov_rr(SCRATCH, b);
2221 SCRATCH
2222 } else {
2223 b
2224 };
2225 if *rd != *ra {
2226 self.asm.mov_rr(d, a);
2227 }
2228 self.emit_shift_by_reg64(d, shift_src, 0); }
2230 }
2231 Opcode::RotL32 => {
2232 if let Args::ThreeReg { ra, rb, rd } = args {
2233 let (d, a, b) = (REG_MAP[*rd], REG_MAP[*ra], REG_MAP[*rb]);
2234 let shift_src = if *rd == *rb && *rd != *ra {
2235 self.asm.mov_rr(SCRATCH, b);
2236 SCRATCH
2237 } else {
2238 b
2239 };
2240 if *rd != *ra {
2241 self.asm.mov_rr(d, a);
2242 }
2243 self.asm.movzx_32_64(d, d);
2244 self.emit_shift_by_reg32(d, shift_src, 0);
2245 self.asm.movsxd(d, d);
2246 }
2247 }
2248 Opcode::RotR64 => {
2249 if let Args::ThreeReg { ra, rb, rd } = args {
2250 let (d, a, b) = (REG_MAP[*rd], REG_MAP[*ra], REG_MAP[*rb]);
2251 let shift_src = if *rd == *rb && *rd != *ra {
2252 self.asm.mov_rr(SCRATCH, b);
2253 SCRATCH
2254 } else {
2255 b
2256 };
2257 if *rd != *ra {
2258 self.asm.mov_rr(d, a);
2259 }
2260 self.emit_shift_by_reg64(d, shift_src, 1); }
2262 }
2263 Opcode::RotR32 => {
2264 if let Args::ThreeReg { ra, rb, rd } = args {
2265 let (d, a, b) = (REG_MAP[*rd], REG_MAP[*ra], REG_MAP[*rb]);
2266 let shift_src = if *rd == *rb && *rd != *ra {
2267 self.asm.mov_rr(SCRATCH, b);
2268 SCRATCH
2269 } else {
2270 b
2271 };
2272 if *rd != *ra {
2273 self.asm.mov_rr(d, a);
2274 }
2275 self.asm.movzx_32_64(d, d);
2276 self.emit_shift_by_reg32(d, shift_src, 1);
2277 self.asm.movsxd(d, d);
2278 }
2279 }
2280
2281 Opcode::AndInv => {
2283 if let Args::ThreeReg { ra, rb, rd } = args {
2284 self.asm.mov_rr(SCRATCH, REG_MAP[*rb]);
2286 self.asm.not64(SCRATCH);
2287 self.asm.mov_rr(REG_MAP[*rd], REG_MAP[*ra]);
2288 self.asm.and_rr(REG_MAP[*rd], SCRATCH);
2289 }
2290 }
2291 Opcode::OrInv => {
2292 if let Args::ThreeReg { ra, rb, rd } = args {
2293 self.asm.mov_rr(SCRATCH, REG_MAP[*rb]);
2295 self.asm.not64(SCRATCH);
2296 self.asm.mov_rr(REG_MAP[*rd], REG_MAP[*ra]);
2297 self.asm.or_rr(REG_MAP[*rd], SCRATCH);
2298 }
2299 }
2300 Opcode::Xnor => {
2301 if let Args::ThreeReg { ra, rb, rd } = args {
2302 let (d, a, b) = (REG_MAP[*rd], REG_MAP[*ra], REG_MAP[*rb]);
2304 if *rd == *rb && *rd != *ra {
2305 self.asm.mov_rr(SCRATCH, b);
2306 self.asm.mov_rr(d, a);
2307 self.asm.xor_rr(d, SCRATCH);
2308 } else {
2309 if *rd != *ra {
2310 self.asm.mov_rr(d, a);
2311 }
2312 self.asm.xor_rr(d, b);
2313 }
2314 self.asm.not64(REG_MAP[*rd]);
2315 }
2316 }
2317
2318 Opcode::Max => {
2320 if let Args::ThreeReg { ra, rb, rd } = args {
2321 let (d, a, b) = (REG_MAP[*rd], REG_MAP[*ra], REG_MAP[*rb]);
2322 self.asm.cmp_rr(a, b);
2323 if *rd == *rb && *rd != *ra {
2324 self.asm.mov_rr(SCRATCH, b);
2325 self.asm.mov_rr(d, a);
2326 self.asm.cmovcc(Cc::L, d, SCRATCH);
2327 } else {
2328 if *rd != *ra {
2329 self.asm.mov_rr(d, a);
2330 }
2331 self.asm.cmovcc(Cc::L, d, b);
2332 }
2333 }
2334 }
2335 Opcode::MaxU => {
2336 if let Args::ThreeReg { ra, rb, rd } = args {
2337 let (d, a, b) = (REG_MAP[*rd], REG_MAP[*ra], REG_MAP[*rb]);
2338 self.asm.cmp_rr(a, b);
2339 if *rd == *rb && *rd != *ra {
2340 self.asm.mov_rr(SCRATCH, b);
2341 self.asm.mov_rr(d, a);
2342 self.asm.cmovcc(Cc::B, d, SCRATCH);
2343 } else {
2344 if *rd != *ra {
2345 self.asm.mov_rr(d, a);
2346 }
2347 self.asm.cmovcc(Cc::B, d, b);
2348 }
2349 }
2350 }
2351 Opcode::Min => {
2352 if let Args::ThreeReg { ra, rb, rd } = args {
2353 let (d, a, b) = (REG_MAP[*rd], REG_MAP[*ra], REG_MAP[*rb]);
2354 self.asm.cmp_rr(a, b);
2355 if *rd == *rb && *rd != *ra {
2356 self.asm.mov_rr(SCRATCH, b);
2357 self.asm.mov_rr(d, a);
2358 self.asm.cmovcc(Cc::G, d, SCRATCH);
2359 } else {
2360 if *rd != *ra {
2361 self.asm.mov_rr(d, a);
2362 }
2363 self.asm.cmovcc(Cc::G, d, b);
2364 }
2365 }
2366 }
2367 Opcode::MinU => {
2368 if let Args::ThreeReg { ra, rb, rd } = args {
2369 let (d, a, b) = (REG_MAP[*rd], REG_MAP[*ra], REG_MAP[*rb]);
2370 self.asm.cmp_rr(a, b);
2371 if *rd == *rb && *rd != *ra {
2372 self.asm.mov_rr(SCRATCH, b);
2373 self.asm.mov_rr(d, a);
2374 self.asm.cmovcc(Cc::A, d, SCRATCH);
2375 } else {
2376 if *rd != *ra {
2377 self.asm.mov_rr(d, a);
2378 }
2379 self.asm.cmovcc(Cc::A, d, b);
2380 }
2381 }
2382 }
2383 }
2384 }
2385
2386 fn emit_static_branch(&mut self, target: u32, condition: bool, _fallthrough: u32, pc: u32) {
2390 if !condition {
2391 return;
2392 }
2393 if !self.is_basic_block_start(target) {
2394 self.asm.mov_store32_rip_rel_imm(CTX_PC, pc as i32);
2395 self.emit_exit(EXIT_PANIC, 0);
2396 return;
2397 }
2398 let label = self.label_for_pc(target);
2399 self.asm.jmp_label(label);
2400 }
2401
2402 fn emit_dynamic_jump(&mut self, ra: usize, imm: u64, pc: u32) {
2404 self.asm.mov_store32_rip_rel_imm(CTX_PC, pc as i32);
2406 self.asm.mov_rr(SCRATCH, REG_MAP[ra]);
2408 if imm as i32 != 0 {
2409 self.asm.add_ri(SCRATCH, imm as i32);
2410 }
2411 self.asm.movzx_32_64(SCRATCH, SCRATCH); self.asm.test_rr(SCRATCH, SCRATCH);
2425 self.asm.jcc_label(Cc::E, self.panic_label);
2426
2427 self.asm.shr_ri64(SCRATCH, 1); self.asm.jcc_label(Cc::B, self.panic_label); self.asm.sub_ri(SCRATCH, 1);
2431
2432 self.asm.cmp_mem32_rip_rel_r(CTX_JT_LEN, SCRATCH);
2435 self.asm.jcc_label(Cc::BE, self.panic_label); self.asm.push(Reg::RAX); self.asm.shl_ri64(SCRATCH, 2); self.asm.mov_load64_rip_rel(Reg::RAX, CTX_JT_PTR);
2441 self.asm.add_rr(Reg::RAX, SCRATCH);
2442 self.asm.mov_load32(SCRATCH, Reg::RAX, 0); let djump_panic = self.asm.new_label();
2446 self.asm.cmp_mem32_rip_rel_r(CTX_BB_LEN, SCRATCH);
2447 self.asm.jcc_label(Cc::BE, djump_panic); self.asm.mov_load64_rip_rel(Reg::RAX, CTX_BB_STARTS);
2449 self.asm.movzx_load8_sib(Reg::RAX, Reg::RAX, SCRATCH);
2450 self.asm.cmp_ri32(Reg::RAX, 1);
2451 self.asm.jcc_label(Cc::NE, djump_panic);
2452
2453 self.asm.mov_load64_rip_rel(Reg::RAX, CTX_DISPATCH_TABLE);
2455 self.asm.movsxd_load_sib4(Reg::RAX, Reg::RAX, SCRATCH);
2456 self.asm.add_r64_mem_rip_rel(Reg::RAX, CTX_CODE_BASE);
2457 self.asm.mov_store32_rip_rel(CTX_PC, SCRATCH);
2459 self.asm.mov_rr(SCRATCH, Reg::RAX); self.asm.pop(Reg::RAX); self.asm.jmp_reg(SCRATCH); self.asm.bind_label(djump_panic);
2466 self.asm.pop(Reg::RAX); self.asm.jmp_label(self.panic_label);
2468 }
2469
2470 fn emit_setcc_3reg(&mut self, ra: usize, rb: usize, rd: usize, cc: Cc) {
2473 let (a, b, d) = (REG_MAP[ra], REG_MAP[rb], REG_MAP[rd]);
2474 if rd != ra && rd != rb {
2475 self.asm.mov_ri64(d, 0); self.asm.cmp_rr(a, b);
2478 self.asm.setcc(cc, d);
2479 } else {
2480 self.asm.cmp_rr(a, b);
2481 self.asm.setcc(cc, d);
2482 self.asm.movzx_8_64(d, d);
2483 }
2484 }
2485
2486 fn emit_setcc_imm(&mut self, ra: usize, rb: usize, imm: u64, cc: Cc) {
2489 let (a, b) = (REG_MAP[ra], REG_MAP[rb]);
2490 if ra != rb {
2491 self.asm.mov_ri64(a, 0); self.emit_cmp_imm(b, imm);
2493 self.asm.setcc(cc, a);
2494 } else {
2495 self.emit_cmp_imm(b, imm);
2496 self.asm.setcc(cc, a);
2497 self.asm.movzx_8_64(a, a);
2498 }
2499 }
2500
2501 fn emit_cmp_imm(&mut self, reg: Reg, imm: u64) {
2503 let imm_i64 = imm as i64;
2504 if imm_i64 >= i32::MIN as i64 && imm_i64 <= i32::MAX as i64 {
2505 self.asm.cmp_ri(reg, imm_i64 as i32);
2506 } else {
2507 self.asm.mov_ri64(SCRATCH, imm);
2508 self.asm.cmp_rr(reg, SCRATCH);
2509 }
2510 }
2511
2512 fn emit_branch_imm(
2514 &mut self,
2515 reg: Reg,
2516 imm: u64,
2517 cc: Cc,
2518 target: u32,
2519 _fallthrough: u32,
2520 pc: u32,
2521 ) {
2522 if !self.is_basic_block_start(target) {
2523 self.asm.mov_store32_rip_rel_imm(CTX_PC, pc as i32);
2525 self.asm.mov_ri64(SCRATCH, imm);
2526 self.asm.cmp_rr(reg, SCRATCH);
2527 self.asm.jcc_label(cc, self.panic_label);
2528 return;
2529 }
2530 self.emit_cmp_imm(reg, imm);
2531 let label = self.label_for_pc(target);
2532 self.asm.jcc_label(cc, label);
2533 }
2534
2535 fn emit_branch_reg(&mut self, a: Reg, b: Reg, cc: Cc, target: u32, _fallthrough: u32, pc: u32) {
2537 if !self.is_basic_block_start(target) {
2538 self.asm.mov_store32_rip_rel_imm(CTX_PC, pc as i32);
2539 self.asm.cmp_rr(a, b);
2540 self.asm.jcc_label(cc, self.panic_label);
2541 return;
2542 }
2543 self.asm.cmp_rr(a, b);
2544 let label = self.label_for_pc(target);
2545 self.asm.jcc_label(cc, label);
2546 }
2547
2548 fn emit_shift_by_reg32(&mut self, dst: Reg, shift_reg: Reg, shift_op: u8) {
2551 if shift_reg == Reg::RCX {
2554 self.asm.shift_cl32(shift_op, dst);
2555 } else if dst == Reg::RCX {
2556 self.asm.push(shift_reg);
2558 self.asm.mov_rr(Reg::RCX, shift_reg);
2559 self.asm.pop(shift_reg); self.asm.mov_rr(SCRATCH, dst);
2564 self.asm.push(Reg::RCX);
2565 self.asm.mov_rr(Reg::RCX, shift_reg);
2566 self.asm.shift_cl32(shift_op, SCRATCH);
2567 self.asm.pop(Reg::RCX);
2568 self.asm.mov_rr(dst, SCRATCH);
2569 } else {
2570 self.asm.push(Reg::RCX);
2571 self.asm.mov_rr(Reg::RCX, shift_reg);
2572 self.asm.shift_cl32(shift_op, dst);
2573 self.asm.pop(Reg::RCX);
2574 }
2575 }
2576
2577 fn emit_shift_by_reg64(&mut self, dst: Reg, shift_reg: Reg, shift_op: u8) {
2578 if shift_reg == Reg::RCX {
2579 self.asm.shift_cl64(shift_op, dst);
2580 } else if dst == Reg::RCX {
2581 self.asm.mov_rr(SCRATCH, dst);
2582 self.asm.push(Reg::RCX);
2583 self.asm.mov_rr(Reg::RCX, shift_reg);
2584 self.asm.shift_cl64(shift_op, SCRATCH);
2585 self.asm.pop(Reg::RCX);
2586 self.asm.mov_rr(dst, SCRATCH);
2587 } else {
2588 self.asm.push(Reg::RCX);
2589 self.asm.mov_rr(Reg::RCX, shift_reg);
2590 self.asm.shift_cl64(shift_op, dst);
2591 self.asm.pop(Reg::RCX);
2592 }
2593 }
2594
2595 #[allow(dead_code)]
2597 fn emit_alu3_64(&mut self, args: &Args, op: impl FnOnce(&mut Assembler, Reg, Reg)) {
2598 self.emit_alu3_64_comm(args, false, op);
2599 }
2600
2601 fn emit_alu3_64_comm(
2605 &mut self,
2606 args: &Args,
2607 commutative: bool,
2608 op: impl FnOnce(&mut Assembler, Reg, Reg),
2609 ) {
2610 if let Args::ThreeReg { ra, rb, rd } = args {
2611 let d = REG_MAP[*rd];
2612 let a = REG_MAP[*ra];
2613 let b = REG_MAP[*rb];
2614 if *rd == *ra {
2615 op(&mut self.asm, d, b);
2616 } else if *rd == *rb && commutative {
2617 op(&mut self.asm, d, a);
2619 } else if *rd == *rb {
2620 self.asm.mov_rr(SCRATCH, b);
2621 self.asm.mov_rr(d, a);
2622 op(&mut self.asm, d, SCRATCH);
2623 } else {
2624 self.asm.mov_rr(d, a);
2625 op(&mut self.asm, d, b);
2626 }
2627 }
2628 }
2629
2630 fn emit_alu3_32(&mut self, args: &Args, op: impl FnOnce(&mut Assembler, Reg, Reg)) {
2632 if let Args::ThreeReg { ra, rb, rd } = args {
2633 let d = REG_MAP[*rd];
2634 let a = REG_MAP[*ra];
2635 let b = REG_MAP[*rb];
2636 if *rd == *ra {
2637 op(&mut self.asm, d, b);
2638 } else if *rd == *rb {
2639 self.asm.mov_rr(SCRATCH, b);
2640 self.asm.mov_rr(d, a);
2641 op(&mut self.asm, d, SCRATCH);
2642 } else {
2643 self.asm.mov_rr(d, a);
2644 op(&mut self.asm, d, b);
2645 }
2646 self.asm.movsxd(d, d);
2647 }
2648 }
2649
2650 fn emit_alu3_32_sub(&mut self, args: &Args) {
2651 if let Args::ThreeReg { ra, rb, rd } = args {
2652 let d = REG_MAP[*rd];
2653 let a = REG_MAP[*ra];
2654 let b = REG_MAP[*rb];
2655 if *rd == *ra {
2656 self.asm.sub_rr32(d, b);
2657 } else if *rd == *rb {
2658 self.asm.neg32(d);
2660 self.asm.add_rr32(d, a);
2661 } else {
2662 self.asm.mov_rr(d, a);
2663 self.asm.sub_rr32(d, b);
2664 }
2665 self.asm.movsxd(d, d);
2666 }
2667 }
2668
2669 fn emit_div(&mut self, args: &Args, signed: bool, remainder: bool, is_32bit: bool) {
2680 if let Args::ThreeReg { ra, rb, rd } = args {
2681 let a_reg = REG_MAP[*ra];
2682 let b_reg = REG_MAP[*rb];
2683 let d_reg = REG_MAP[*rd];
2684
2685 self.asm.test_rr(b_reg, b_reg);
2687 let nonzero = self.asm.new_label();
2688 let done = self.asm.new_label();
2689 self.asm.jcc_label(Cc::NE, nonzero);
2690
2691 if remainder {
2693 self.asm.mov_rr(d_reg, a_reg);
2694 } else {
2695 self.asm.mov_ri64(d_reg, u64::MAX);
2696 if is_32bit {
2697 self.asm.movsxd(d_reg, d_reg);
2698 }
2699 }
2700 self.asm.jmp_label(done);
2701
2702 self.asm.bind_label(nonzero);
2703
2704 if b_reg != Reg::RAX {
2705 self.emit_div_fast(a_reg, b_reg, d_reg, signed, remainder, is_32bit);
2708 } else {
2709 self.emit_div_b_is_rax(a_reg, d_reg, signed, remainder, is_32bit);
2712 }
2713
2714 if is_32bit {
2715 self.asm.movsxd(d_reg, d_reg);
2716 }
2717
2718 self.asm.bind_label(done);
2719 }
2720 }
2721
2722 fn emit_div_fast(
2725 &mut self,
2726 a_reg: Reg,
2727 b_reg: Reg,
2728 d_reg: Reg,
2729 signed: bool,
2730 remainder: bool,
2731 is_32bit: bool,
2732 ) {
2733 let save_rax = d_reg != Reg::RAX;
2734
2735 if save_rax {
2736 self.asm.push(Reg::RAX);
2737 }
2738
2739 if a_reg != Reg::RAX {
2741 self.asm.mov_rr(Reg::RAX, a_reg);
2742 }
2743
2744 self.emit_div_setup_and_exec(signed, is_32bit, b_reg);
2746
2747 if save_rax {
2748 let result_reg = if remainder { SCRATCH } else { Reg::RAX };
2750 self.asm.mov_rr(d_reg, result_reg);
2751 self.asm.pop(Reg::RAX);
2752 } else {
2753 if remainder {
2755 self.asm.mov_rr(Reg::RAX, SCRATCH);
2756 }
2757 }
2758 }
2759
2760 fn emit_div_b_is_rax(
2763 &mut self,
2764 a_reg: Reg,
2765 d_reg: Reg,
2766 signed: bool,
2767 remainder: bool,
2768 is_32bit: bool,
2769 ) {
2770 self.asm.push(Reg::RAX); self.asm.push(Reg::RCX); self.asm.mov_rr(Reg::RCX, Reg::RAX);
2778
2779 if a_reg == Reg::RAX {
2781 } else if a_reg == Reg::RCX {
2784 self.asm.mov_load64(Reg::RAX, Reg::RSP, 0); } else {
2787 self.asm.mov_rr(Reg::RAX, a_reg);
2788 }
2789
2790 self.emit_div_setup_and_exec(signed, is_32bit, Reg::RCX);
2792
2793 let result_reg = if remainder { SCRATCH } else { Reg::RAX };
2795
2796 if d_reg == Reg::RAX {
2797 if remainder {
2798 self.asm.mov_rr(Reg::RAX, SCRATCH);
2799 }
2800 self.asm.pop(Reg::RCX); self.asm.pop(SCRATCH); } else if d_reg == Reg::RCX {
2803 self.asm.mov_rr(Reg::RCX, result_reg);
2804 self.asm.pop(SCRATCH); self.asm.pop(Reg::RAX); } else {
2807 self.asm.mov_rr(d_reg, result_reg);
2808 self.asm.pop(Reg::RCX); self.asm.pop(Reg::RAX); }
2811 }
2812
2813 fn emit_div_setup_and_exec(&mut self, signed: bool, is_32bit: bool, divisor: Reg) {
2815 if is_32bit {
2816 if signed {
2817 self.asm.movsxd(Reg::RAX, Reg::RAX);
2818 self.asm.cdq();
2819 self.asm.idiv32(divisor);
2820 } else {
2821 self.asm.movzx_32_64(Reg::RAX, Reg::RAX);
2822 self.asm.mov_ri64(SCRATCH, 0);
2823 self.asm.div32(divisor);
2824 }
2825 } else if signed {
2826 self.asm.cqo();
2827 self.asm.idiv64(divisor);
2828 } else {
2829 self.asm.mov_ri64(SCRATCH, 0);
2830 self.asm.div64(divisor);
2831 }
2832 }
2833
2834 fn emit_mul_upper(&mut self, args: &Args, a_signed: bool, b_signed: bool) {
2839 if let Args::ThreeReg { ra, rb, rd } = args {
2840 let d_reg = REG_MAP[*rd];
2841 let rb_is_rax = REG_MAP[*rb] == Reg::RAX;
2842 let save_rax = d_reg != Reg::RAX || rb_is_rax;
2845
2846 if save_rax {
2847 self.asm.push(Reg::RAX); }
2849
2850 if REG_MAP[*ra] != Reg::RAX {
2852 self.asm.mov_rr(Reg::RAX, REG_MAP[*ra]);
2853 }
2854
2855 let mul_src = if rb_is_rax {
2857 self.asm.mov_load64(SCRATCH, Reg::RSP, 0);
2859 SCRATCH
2860 } else {
2861 REG_MAP[*rb]
2862 };
2863
2864 if a_signed && b_signed {
2865 self.asm.imul_rdx_rax(mul_src);
2866 } else if !a_signed && !b_signed {
2867 self.asm.mul_rdx_rax(mul_src);
2868 } else {
2869 self.asm.push(mul_src); self.asm.push(Reg::RAX); if rb_is_rax {
2874 self.asm.mov_load64(SCRATCH, Reg::RSP, 16);
2877 self.asm.mul_rdx_rax(SCRATCH);
2878 } else {
2879 self.asm.mul_rdx_rax(mul_src);
2880 }
2881 self.asm.pop(Reg::RAX); let skip = self.asm.new_label();
2884 self.asm.test_rr(Reg::RAX, Reg::RAX);
2885 self.asm.jcc_label(Cc::NS, skip);
2886 self.asm.pop(Reg::RAX); self.asm.sub_rr(SCRATCH, Reg::RAX);
2889 let done = self.asm.new_label();
2890 self.asm.jmp_label(done);
2891 self.asm.bind_label(skip);
2892 self.asm.add_ri(Reg::RSP, 8); self.asm.bind_label(done);
2894 }
2895
2896 if save_rax {
2898 if d_reg == Reg::RAX {
2899 self.asm.add_ri(Reg::RSP, 8);
2902 self.asm.mov_rr(Reg::RAX, SCRATCH);
2903 } else {
2904 self.asm.mov_rr(d_reg, SCRATCH);
2905 self.asm.pop(Reg::RAX); }
2907 } else {
2908 self.asm.mov_rr(Reg::RAX, SCRATCH);
2910 }
2911 }
2912 }
2913
2914 fn emit_exit(&mut self, reason: u32, arg: u32) {
2916 self.asm
2917 .mov_store32_rip_rel_imm(CTX_EXIT_REASON, reason as i32);
2918 self.asm.mov_store32_rip_rel_imm(CTX_EXIT_ARG, arg as i32);
2919 self.asm.jmp_label(self.exit_label);
2920 }
2921
2922 fn emit_prologue(&mut self) {
2925 self.asm.ensure_capacity(512); self.asm.push(Reg::RBX);
2928 self.asm.push(Reg::RBP);
2929 self.asm.push(Reg::R12);
2930 self.asm.push(Reg::R13);
2931 self.asm.push(Reg::R14);
2932 self.asm.push(Reg::R15);
2933
2934 self.asm.push(SCRATCH); self.asm.mov_load64_rip_rel(GAS, CTX_GAS);
2945
2946 self.asm.mov_store32_rip_rel_imm(CTX_EXIT_REASON, 0);
2948
2949 self.asm.mov_load32_rip_rel(SCRATCH, CTX_ENTRY_PC);
2951 self.asm.mov_load64_rip_rel(Reg::RAX, CTX_DISPATCH_TABLE);
2952 self.asm.movsxd_load_sib4(Reg::RAX, Reg::RAX, SCRATCH);
2953 self.asm.mov_load64_rip_rel(SCRATCH, CTX_CODE_BASE);
2954 self.asm.add_rr(Reg::RAX, SCRATCH);
2955 self.asm.push(Reg::RAX);
2956
2957 for (i, ®) in REG_MAP.iter().enumerate() {
2959 self.asm.mov_load64_rip_rel(reg, CTX_REGS + (i as u64) * 8);
2960 }
2961
2962 self.asm.pop(SCRATCH);
2964 self.asm.jmp_reg(SCRATCH);
2965 }
2966
2967 fn emit_exit_sequences(&mut self) {
2969 let needed = 512 + self.oog_stubs.len() * 16;
2972 self.asm.ensure_capacity(needed);
2973 self.asm.bind_label(self.oog_pc_label);
2976 self.asm.mov_store32_rip_rel(CTX_PC, SCRATCH);
2977 self.asm.bind_label(self.oog_label);
2979 self.asm
2980 .mov_store32_rip_rel_imm(CTX_EXIT_REASON, EXIT_OOG as i32);
2981 self.asm.jmp_label(self.exit_label);
2982
2983 let stubs = core::mem::take(&mut self.oog_stubs);
2986 for (label, pvm_pc, _cost) in &stubs {
2987 self.asm.bind_label(*label);
2988 self.asm.mov_ri32(SCRATCH, *pvm_pc);
2989 self.asm.jmp_label(self.oog_pc_label);
2990 }
2991
2992 self.asm.bind_label(self.panic_label);
2996 self.asm
2997 .mov_store32_rip_rel_imm(CTX_EXIT_REASON, EXIT_PANIC as i32);
2998 self.asm.bind_label(self.exit_label);
3002 self.asm.mov_store64_rip_rel(CTX_GAS, GAS);
3003 for (i, ®) in REG_MAP.iter().enumerate() {
3004 self.asm.mov_store64_rip_rel(CTX_REGS + (i as u64) * 8, reg);
3005 }
3006
3007 self.asm.pop(SCRATCH); self.asm.pop(Reg::R15);
3010 self.asm.pop(Reg::R14);
3011 self.asm.pop(Reg::R13);
3012 self.asm.pop(Reg::R12);
3013 self.asm.pop(Reg::RBP);
3014 self.asm.pop(Reg::RBX);
3015 self.asm.ret();
3016 }
3017
3018 fn read_fn_for(&self, opcode: Opcode) -> u64 {
3020 match opcode {
3021 Opcode::LoadU8 | Opcode::LoadI8 | Opcode::LoadIndU8 | Opcode::LoadIndI8 => {
3022 self.helpers.mem_read_u8
3023 }
3024 Opcode::LoadU16 | Opcode::LoadI16 | Opcode::LoadIndU16 | Opcode::LoadIndI16 => {
3025 self.helpers.mem_read_u16
3026 }
3027 Opcode::LoadU32 | Opcode::LoadI32 | Opcode::LoadIndU32 | Opcode::LoadIndI32 => {
3028 self.helpers.mem_read_u32
3029 }
3030 Opcode::LoadU64 | Opcode::LoadIndU64 => self.helpers.mem_read_u64,
3031 _ => self.helpers.mem_read_u8,
3032 }
3033 }
3034
3035 fn write_fn_for(&self, opcode: Opcode) -> u64 {
3037 match opcode {
3038 Opcode::StoreU8 | Opcode::StoreIndU8 => self.helpers.mem_write_u8,
3039 Opcode::StoreU16 | Opcode::StoreIndU16 => self.helpers.mem_write_u16,
3040 Opcode::StoreU32 | Opcode::StoreIndU32 => self.helpers.mem_write_u32,
3041 Opcode::StoreU64 | Opcode::StoreIndU64 => self.helpers.mem_write_u64,
3042 _ => self.helpers.mem_write_u8,
3043 }
3044 }
3045}