1use alloc::vec;
16use alloc::vec::Vec;
17
18#[derive(Clone, Copy)]
21pub struct InstBuf {
22 out: u128,
23 length: u32, }
25
26impl Default for InstBuf {
27 fn default() -> Self {
28 Self::new()
29 }
30}
31
32impl InstBuf {
33 #[inline(always)]
34 pub fn new() -> Self {
35 Self { out: 0, length: 0 }
36 }
37
38 #[inline(always)]
39 pub fn push(&mut self, byte: u8) {
40 self.out |= (byte as u128) << self.length;
41 self.length += 8;
42 }
43
44 #[inline(always)]
45 pub fn push_u32(&mut self, v: u32) {
46 self.out |= (v as u128) << self.length;
47 self.length += 32;
48 }
49
50 #[inline(always)]
51 pub fn push_u64(&mut self, v: u64) {
52 self.out |= (v as u128) << self.length;
53 self.length += 64;
54 }
55
56 #[inline(always)]
57 pub fn push_i32(&mut self, v: i32) {
58 self.push_u32(v as u32);
59 }
60
61 #[inline(always)]
62 pub fn len(&self) -> usize {
63 (self.length >> 3) as usize
64 }
65
66 pub fn is_empty(&self) -> bool {
67 self.length == 0
68 }
69}
70
71#[derive(Clone, Copy, Debug, PartialEq, Eq)]
73#[repr(u8)]
74pub enum Reg {
75 RAX = 0,
76 RCX = 1,
77 RDX = 2,
78 RBX = 3,
79 RSP = 4,
80 RBP = 5,
81 RSI = 6,
82 RDI = 7,
83 R8 = 8,
84 R9 = 9,
85 R10 = 10,
86 R11 = 11,
87 R12 = 12,
88 R13 = 13,
89 R14 = 14,
90 R15 = 15,
91}
92
93impl Reg {
94 fn lo(self) -> u8 {
96 (self as u8) & 7
97 }
98 fn hi(self) -> u8 {
100 (self as u8) >> 3
101 }
102 fn needs_rex(self) -> bool {
104 (self as u8) >= 8
105 }
106}
107
108#[derive(Clone, Copy, Debug, PartialEq, Eq)]
110#[repr(u8)]
111pub enum Cc {
112 O = 0,
113 NO = 1,
114 B = 2, AE = 3, E = 4, NE = 5, BE = 6, A = 7, S = 8, NS = 9,
122 P = 10,
123 NP = 11,
124 L = 12, GE = 13, LE = 14, G = 15, }
129
130#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)]
132pub struct Label(pub u32);
133
134#[derive(Clone, Copy)]
136struct Fixup {
137 offset: usize,
139 label: Label,
141}
142
143enum CodeBuf {
147 Vec(Vec<u8>),
148}
149
150pub struct Assembler {
155 code_buf: CodeBuf,
156 buf: *mut u8,
158 write_pos: usize,
159 capacity: usize,
160 labels: Vec<u32>,
164 labels_len: usize,
167 fixups: Vec<Fixup>,
168 jit_va_base: u64,
173}
174
175const LABEL_UNBOUND: u32 = 0;
179
180impl Default for Assembler {
181 fn default() -> Self {
182 Self::new()
183 }
184}
185
186impl Assembler {
187 pub fn new() -> Self {
188 let mut code = Vec::with_capacity(4096);
189 let buf = code.as_mut_ptr();
190 let capacity = code.capacity();
191 Self {
192 code_buf: CodeBuf::Vec(code),
193 buf,
194 write_pos: 0,
195 capacity,
196 labels: Vec::new(),
197 labels_len: 0,
198 fixups: Vec::new(),
199 jit_va_base: 0,
200 }
201 }
202
203 pub fn with_capacity(code_capacity: usize, label_capacity: usize) -> Self {
206 let mut code = Vec::with_capacity(code_capacity);
207 let buf = code.as_mut_ptr();
208 let capacity = code.capacity();
209 Self {
210 code_buf: CodeBuf::Vec(code),
211 buf,
212 write_pos: 0,
213 capacity,
214 labels: vec![0u32; label_capacity],
216 labels_len: 0,
217 fixups: Vec::with_capacity(2048),
218 jit_va_base: 0,
219 }
220 }
221
222 pub fn set_jit_va_base(&mut self, va: u64) {
225 self.jit_va_base = va;
226 }
227
228 #[cold]
232 fn grow(&mut self, additional: usize) {
233 let CodeBuf::Vec(code) = &mut self.code_buf;
234 unsafe {
237 code.set_len(self.write_pos);
238 }
239 code.reserve(additional);
240 self.buf = code.as_mut_ptr();
241 self.capacity = code.capacity();
242 unsafe {
245 code.set_len(0);
246 }
247 }
248
249 #[inline(always)]
251 pub fn ensure_capacity(&mut self, n: usize) {
252 if self.write_pos + n > self.capacity {
253 self.grow(n);
254 }
255 }
256
257 pub fn new_label(&mut self) -> Label {
259 let id = self.labels_len as u32;
260 self.labels_len += 1;
261 if self.labels_len > self.labels.len() {
263 self.labels.push(LABEL_UNBOUND);
264 }
265 Label(id)
266 }
267
268 pub fn labels_len(&self) -> usize {
270 self.labels_len
271 }
272
273 pub fn bulk_create_labels(&mut self, count: usize) {
276 self.labels_len += count;
277 if self.labels_len > self.labels.len() {
279 self.labels.resize(self.labels_len, LABEL_UNBOUND);
280 }
281 }
282
283 pub fn bind_label(&mut self, label: Label) {
285 self.labels[label.0 as usize] = (self.write_pos + 1) as u32; }
287
288 pub fn offset(&self) -> usize {
290 self.write_pos
291 }
292
293 pub fn patch_i32(&mut self, offset: usize, value: i32) {
295 debug_assert!(offset + 4 <= self.write_pos);
296 unsafe {
298 core::ptr::copy_nonoverlapping(value.to_le_bytes().as_ptr(), self.buf.add(offset), 4);
299 }
300 }
301
302 #[inline(always)]
309 fn emit(&mut self, b: u8) {
310 debug_assert!(self.write_pos < self.capacity);
311 unsafe {
314 *self.buf.add(self.write_pos) = b;
315 }
316 self.write_pos += 1;
317 }
318
319 #[inline(always)]
321 fn emit3(&mut self, a: u8, b: u8, c: u8) {
322 debug_assert!(self.write_pos + 3 <= self.capacity);
323 unsafe {
326 let p = self.buf.add(self.write_pos);
327 *p = a;
328 *p.add(1) = b;
329 *p.add(2) = c;
330 }
331 self.write_pos += 3;
332 }
333
334 #[inline(always)]
336 fn flush_instbuf(&mut self, ib: InstBuf) {
337 let len = ib.len();
338 debug_assert!(self.write_pos + len <= self.capacity);
339 unsafe {
344 let p = self.buf.add(self.write_pos);
345 core::ptr::write_unaligned(p as *mut u64, ib.out as u64);
346 core::ptr::write_unaligned(p.add(8) as *mut u64, (ib.out >> 64) as u64);
347 }
348 self.write_pos += len;
349 }
350
351 #[inline(always)]
352 fn emit_u32(&mut self, v: u32) {
353 debug_assert!(self.write_pos + 4 <= self.capacity);
354 unsafe {
357 core::ptr::write_unaligned(self.buf.add(self.write_pos) as *mut u32, v.to_le());
358 }
359 self.write_pos += 4;
360 }
361
362 #[inline(always)]
363 #[allow(dead_code)]
364 fn emit_u64(&mut self, v: u64) {
365 debug_assert!(self.write_pos + 8 <= self.capacity);
366 unsafe {
368 core::ptr::write_unaligned(self.buf.add(self.write_pos) as *mut u64, v.to_le());
369 }
370 self.write_pos += 8;
371 }
372
373 #[inline(always)]
374 fn emit_i32(&mut self, v: i32) {
375 debug_assert!(self.write_pos + 4 <= self.capacity);
376 unsafe {
379 core::ptr::write_unaligned(
380 self.buf.add(self.write_pos) as *mut u32,
381 v.to_le_bytes().as_ptr().cast::<u32>().read(),
382 );
383 }
384 self.write_pos += 4;
385 }
386
387 fn emit_label_fixup(&mut self, label: Label) {
391 let bound = self.labels[label.0 as usize];
392 if bound != LABEL_UNBOUND {
393 let target = (bound - 1) as i64;
396 let rel = target - (self.write_pos as i64 + 4);
397 self.emit_i32(rel as i32);
398 } else {
399 let offset = self.write_pos;
401 self.fixups.push(Fixup { offset, label });
402 self.emit_u32(0); }
404 }
405
406 #[allow(dead_code)]
410 fn rex_w(&mut self, reg: Reg, rm: Reg) {
411 self.emit(0x48 | (reg.hi() << 2) | rm.hi());
412 }
413
414 fn rex_w_b(&mut self, rm: Reg) {
416 self.emit(0x48 | rm.hi());
417 }
418
419 #[allow(dead_code)]
421 fn rex_opt(&mut self, reg: Reg, rm: Reg) {
422 let r = reg.hi();
423 let b = rm.hi();
424 if r != 0 || b != 0 {
425 self.emit(0x40 | (r << 2) | b);
426 }
427 }
428
429 fn rex_opt_b(&mut self, rm: Reg) {
430 if rm.needs_rex() {
431 self.emit(0x40 | rm.hi());
432 }
433 }
434
435 #[allow(dead_code)]
437 fn modrm_rr(&mut self, reg: Reg, rm: Reg) {
438 self.emit(0xC0 | (reg.lo() << 3) | rm.lo());
439 }
440
441 #[inline(always)]
444 fn modrm_disp_ib(ib: &mut InstBuf, reg: u8, base: Reg, disp: i32) {
445 let bl = base.lo();
446 let needs_sib = bl == 4;
447
448 if disp == 0 && bl != 5 {
449 if needs_sib {
450 ib.push((reg << 3) | 4);
451 ib.push(0x24);
452 } else {
453 ib.push((reg << 3) | bl);
454 }
455 } else if (-128..=127).contains(&disp) {
456 if needs_sib {
457 ib.push(0x40 | (reg << 3) | 4);
458 ib.push(0x24);
459 } else {
460 ib.push(0x40 | (reg << 3) | bl);
461 }
462 ib.push(disp as u8);
463 } else {
464 if needs_sib {
465 ib.push(0x80 | (reg << 3) | 4);
466 ib.push(0x24);
467 } else {
468 ib.push(0x80 | (reg << 3) | bl);
469 }
470 ib.push_i32(disp);
471 }
472 }
473
474 #[allow(dead_code)]
476 fn modrm_disp(&mut self, reg: u8, base: Reg, disp: i32) {
477 let mut ib = InstBuf::new();
478 Self::modrm_disp_ib(&mut ib, reg, base, disp);
479 self.flush_instbuf(ib);
480 }
481
482 #[inline(always)]
484 fn modrm_sib_base_index_ib(ib: &mut InstBuf, reg: u8, base: Reg, index: Reg) {
485 if base.lo() == 5 {
486 ib.push(0x44 | (reg << 3));
487 ib.push((index.lo() << 3) | base.lo());
488 ib.push(0);
489 } else {
490 ib.push((reg << 3) | 4);
491 ib.push((index.lo() << 3) | base.lo());
492 }
493 }
494
495 fn modrm_disp32(&mut self, reg: u8, base: Reg, disp: i32) {
499 let mut ib = InstBuf::new();
500 if base.lo() == 4 {
501 ib.push(0x80 | (reg << 3) | 4);
502 ib.push(0x24);
503 } else {
504 ib.push(0x80 | (reg << 3) | base.lo());
505 }
506 ib.push_i32(disp);
507 self.flush_instbuf(ib);
508 }
509
510 #[inline(always)]
516 pub fn mov_rr(&mut self, dst: Reg, src: Reg) {
517 if dst == src {
518 return;
519 }
520 self.emit3(
521 0x48 | (src.hi() << 2) | dst.hi(),
522 0x89,
523 0xC0 | (src.lo() << 3) | dst.lo(),
524 );
525 }
526
527 #[inline(always)]
529 pub fn mov_ri64(&mut self, dst: Reg, imm: u64) {
530 let mut ib = InstBuf::new();
531 if imm == 0 {
532 let r = dst.hi();
534 if r != 0 {
535 ib.push(0x40 | (r << 2) | r);
536 }
537 ib.push(0x31);
538 ib.push(0xC0 | (dst.lo() << 3) | dst.lo());
539 } else if imm <= u32::MAX as u64 {
540 if dst.needs_rex() {
542 ib.push(0x40 | dst.hi());
543 }
544 ib.push(0xB8 + dst.lo());
545 ib.push_u32(imm as u32);
546 } else if imm as i64 >= i32::MIN as i64 && imm as i64 <= i32::MAX as i64 {
547 ib.push(0x48 | dst.hi());
549 ib.push(0xC7);
550 ib.push(0xC0 | dst.lo());
551 ib.push_i32(imm as i32);
552 } else {
553 ib.push(0x48 | dst.hi());
555 ib.push(0xB8 + dst.lo());
556 ib.push_u64(imm);
557 }
558 self.flush_instbuf(ib);
559 }
560
561 #[inline(always)]
563 pub fn mov_ri32(&mut self, dst: Reg, imm: u32) {
564 let mut ib = InstBuf::new();
565 if dst.needs_rex() {
566 ib.push(0x40 | dst.hi());
567 }
568 ib.push(0xB8 + dst.lo());
569 ib.push_u32(imm);
570 self.flush_instbuf(ib);
571 }
572
573 #[inline(always)]
575 pub fn mov_load32(&mut self, dst: Reg, base: Reg, disp: i32) {
576 let mut ib = InstBuf::new();
577 let r = dst.hi();
578 let b = base.hi();
579 if r != 0 || b != 0 {
580 ib.push(0x40 | (r << 2) | b);
581 }
582 ib.push(0x8B);
583 Self::modrm_disp_ib(&mut ib, dst.lo(), base, disp);
584 self.flush_instbuf(ib);
585 }
586
587 #[inline(always)]
589 pub fn mov_load64(&mut self, dst: Reg, base: Reg, disp: i32) {
590 let mut ib = InstBuf::new();
591 ib.push(0x48 | (dst.hi() << 2) | base.hi());
592 ib.push(0x8B);
593 Self::modrm_disp_ib(&mut ib, dst.lo(), base, disp);
594 self.flush_instbuf(ib);
595 }
596
597 pub fn movsxd_load_sib4(&mut self, dst: Reg, base: Reg, index: Reg) {
599 let mut ib = InstBuf::new();
600 ib.push(0x48 | (dst.hi() << 2) | (index.hi() << 1) | base.hi());
601 ib.push(0x63);
602 ib.push((dst.lo() << 3) | 4);
603 ib.push(0x80 | (index.lo() << 3) | base.lo());
604 self.flush_instbuf(ib);
605 }
606
607 #[inline(always)]
609 pub fn mov_store32(&mut self, base: Reg, disp: i32, src: Reg) {
610 let mut ib = InstBuf::new();
611 let r = src.hi();
612 let b = base.hi();
613 if r != 0 || b != 0 {
614 ib.push(0x40 | (r << 2) | b);
615 }
616 ib.push(0x89);
617 Self::modrm_disp_ib(&mut ib, src.lo(), base, disp);
618 self.flush_instbuf(ib);
619 }
620
621 #[inline(always)]
623 pub fn mov_store64(&mut self, base: Reg, disp: i32, src: Reg) {
624 let mut ib = InstBuf::new();
625 ib.push(0x48 | (src.hi() << 2) | base.hi());
626 ib.push(0x89);
627 Self::modrm_disp_ib(&mut ib, src.lo(), base, disp);
628 self.flush_instbuf(ib);
629 }
630
631 #[inline(always)]
633 pub fn mov_store32_imm(&mut self, base: Reg, disp: i32, imm: i32) {
634 let mut ib = InstBuf::new();
635 if base.needs_rex() {
636 ib.push(0x40 | base.hi());
637 }
638 ib.push(0xC7);
639 Self::modrm_disp_ib(&mut ib, 0, base, disp);
640 ib.push_i32(imm);
641 self.flush_instbuf(ib);
642 }
643
644 pub fn mov_store64_imm(&mut self, base: Reg, disp: i32, imm: i32) {
646 let mut ib = InstBuf::new();
647 ib.push(0x48 | base.hi());
648 ib.push(0xC7);
649 Self::modrm_disp_ib(&mut ib, 0, base, disp);
650 ib.push_i32(imm);
651 self.flush_instbuf(ib);
652 }
653
654 #[allow(dead_code)]
660 fn modrm_sib_base_index(&mut self, reg: u8, base: Reg, index: Reg) {
661 let mut ib = InstBuf::new();
662 Self::modrm_sib_base_index_ib(&mut ib, reg, base, index);
663 self.flush_instbuf(ib);
664 }
665
666 pub fn movzx_load8_sib(&mut self, dst: Reg, base: Reg, index: Reg) {
668 let mut ib = InstBuf::new();
669 ib.push(0x48 | (dst.hi() << 2) | (index.hi() << 1) | base.hi());
670 ib.push(0x0F);
671 ib.push(0xB6);
672 Self::modrm_sib_base_index_ib(&mut ib, dst.lo(), base, index);
673 self.flush_instbuf(ib);
674 }
675
676 pub fn movzx_load16_sib(&mut self, dst: Reg, base: Reg, index: Reg) {
678 let mut ib = InstBuf::new();
679 let rex = 0x40 | (dst.hi() << 2) | (index.hi() << 1) | base.hi();
680 if rex != 0x40 {
681 ib.push(rex);
682 }
683 ib.push(0x0F);
684 ib.push(0xB7);
685 Self::modrm_sib_base_index_ib(&mut ib, dst.lo(), base, index);
686 self.flush_instbuf(ib);
687 }
688
689 pub fn mov_load32_sib(&mut self, dst: Reg, base: Reg, index: Reg) {
691 let mut ib = InstBuf::new();
692 let rex = 0x40 | (dst.hi() << 2) | (index.hi() << 1) | base.hi();
693 if rex != 0x40 {
694 ib.push(rex);
695 }
696 ib.push(0x8B);
697 Self::modrm_sib_base_index_ib(&mut ib, dst.lo(), base, index);
698 self.flush_instbuf(ib);
699 }
700
701 pub fn mov_load64_sib(&mut self, dst: Reg, base: Reg, index: Reg) {
703 let mut ib = InstBuf::new();
704 ib.push(0x48 | (dst.hi() << 2) | (index.hi() << 1) | base.hi());
705 ib.push(0x8B);
706 Self::modrm_sib_base_index_ib(&mut ib, dst.lo(), base, index);
707 self.flush_instbuf(ib);
708 }
709
710 pub fn mov_store8_sib(&mut self, base: Reg, index: Reg, src: Reg) {
712 let mut ib = InstBuf::new();
713 ib.push(0x40 | (src.hi() << 2) | (index.hi() << 1) | base.hi());
714 ib.push(0x88);
715 Self::modrm_sib_base_index_ib(&mut ib, src.lo(), base, index);
716 self.flush_instbuf(ib);
717 }
718
719 pub fn mov_store16_sib(&mut self, base: Reg, index: Reg, src: Reg) {
721 let mut ib = InstBuf::new();
722 ib.push(0x66);
723 let rex = 0x40 | (src.hi() << 2) | (index.hi() << 1) | base.hi();
724 if rex != 0x40 {
725 ib.push(rex);
726 }
727 ib.push(0x89);
728 Self::modrm_sib_base_index_ib(&mut ib, src.lo(), base, index);
729 self.flush_instbuf(ib);
730 }
731
732 pub fn mov_store32_sib(&mut self, base: Reg, index: Reg, src: Reg) {
734 let mut ib = InstBuf::new();
735 let rex = 0x40 | (src.hi() << 2) | (index.hi() << 1) | base.hi();
736 if rex != 0x40 {
737 ib.push(rex);
738 }
739 ib.push(0x89);
740 Self::modrm_sib_base_index_ib(&mut ib, src.lo(), base, index);
741 self.flush_instbuf(ib);
742 }
743
744 pub fn mov_store64_sib(&mut self, base: Reg, index: Reg, src: Reg) {
746 let mut ib = InstBuf::new();
747 ib.push(0x48 | (src.hi() << 2) | (index.hi() << 1) | base.hi());
748 ib.push(0x89);
749 Self::modrm_sib_base_index_ib(&mut ib, src.lo(), base, index);
750 self.flush_instbuf(ib);
751 }
752
753 pub fn mov_store32_sib_imm(&mut self, base: Reg, index: Reg, imm: i32) {
755 let mut ib = InstBuf::new();
756 let rex = 0x40 | (index.hi() << 1) | base.hi();
757 if rex != 0x40 {
758 ib.push(rex);
759 }
760 ib.push(0xC7);
761 Self::modrm_sib_base_index_ib(&mut ib, 0, base, index);
762 ib.push_i32(imm);
763 self.flush_instbuf(ib);
764 }
765
766 pub fn mov_store64_sib_imm(&mut self, base: Reg, index: Reg, imm: i32) {
768 let mut ib = InstBuf::new();
769 ib.push(0x48 | (index.hi() << 1) | base.hi());
770 ib.push(0xC7);
771 Self::modrm_sib_base_index_ib(&mut ib, 0, base, index);
772 ib.push_i32(imm);
773 self.flush_instbuf(ib);
774 }
775
776 pub fn mov_store8_sib_imm(&mut self, base: Reg, index: Reg, imm: u8) {
778 let mut ib = InstBuf::new();
779 ib.push(0x40 | (index.hi() << 1) | base.hi());
780 ib.push(0xC6);
781 Self::modrm_sib_base_index_ib(&mut ib, 0, base, index);
782 ib.push(imm);
783 self.flush_instbuf(ib);
784 }
785
786 pub fn mov_store16_sib_imm(&mut self, base: Reg, index: Reg, imm: u16) {
788 let mut ib = InstBuf::new();
789 ib.push(0x66);
790 let rex = 0x40 | (index.hi() << 1) | base.hi();
791 if rex != 0x40 {
792 ib.push(rex);
793 }
794 ib.push(0xC7);
795 Self::modrm_sib_base_index_ib(&mut ib, 0, base, index);
796 ib.push(imm as u8);
797 ib.push((imm >> 8) as u8);
798 self.flush_instbuf(ib);
799 }
800
801 pub fn add_r64_mem(&mut self, dst: Reg, base: Reg, disp: i32) {
803 let mut ib = InstBuf::new();
804 ib.push(0x48 | (dst.hi() << 2) | base.hi());
805 ib.push(0x03);
806 Self::modrm_disp_ib(&mut ib, dst.lo(), base, disp);
807 self.flush_instbuf(ib);
808 }
809
810 pub fn movzx_load8_deref(&mut self, dst: Reg, base: Reg) {
812 let mut ib = InstBuf::new();
813 ib.push(0x48 | (dst.hi() << 2) | base.hi());
814 ib.push(0x0F);
815 ib.push(0xB6);
816 if base.lo() == 5 {
817 ib.push((dst.lo() << 3) | base.lo() | 0x40);
818 ib.push(0);
819 } else if base.lo() == 4 {
820 ib.push((dst.lo() << 3) | 4);
821 ib.push(0x24);
822 } else {
823 ib.push((dst.lo() << 3) | base.lo());
824 }
825 self.flush_instbuf(ib);
826 }
827
828 pub fn cmp_byte_sib_disp32(&mut self, base: Reg, index: Reg, disp: i32, imm: u8) {
830 let mut ib = InstBuf::new();
831 let rex = 0x40 | (index.hi() << 1) | base.hi();
832 if rex != 0x40 {
833 ib.push(rex);
834 }
835 ib.push(0x80);
836 ib.push(0xBC); ib.push((index.lo() << 3) | base.lo());
838 ib.push_i32(disp);
839 ib.push(imm);
840 self.flush_instbuf(ib);
841 }
842
843 pub fn cmp_byte_deref_imm(&mut self, base: Reg, imm: u8) {
845 let mut ib = InstBuf::new();
846 if base.needs_rex() {
847 ib.push(0x41 | base.hi());
848 }
849 ib.push(0x80);
850 if base.lo() == 5 {
851 ib.push(0x78 | base.lo());
852 ib.push(0);
853 } else if base.lo() == 4 {
854 ib.push(0x38 | 4);
855 ib.push(0x24);
856 } else {
857 ib.push(0x38 | base.lo());
858 }
859 ib.push(imm);
860 self.flush_instbuf(ib);
861 }
862
863 fn alu_rr64(&mut self, op: u8, dst: Reg, src: Reg) {
866 let mut ib = InstBuf::new();
867 ib.push(0x48 | (src.hi() << 2) | dst.hi());
868 ib.push(op);
869 ib.push(0xC0 | (src.lo() << 3) | dst.lo());
870 self.flush_instbuf(ib);
871 }
872
873 fn alu_rr32(&mut self, op: u8, dst: Reg, src: Reg) {
874 let r = src.hi();
875 let b = dst.hi();
876 if r != 0 || b != 0 {
877 let mut ib = InstBuf::new();
878 ib.push(0x40 | (r << 2) | b);
879 ib.push(op);
880 ib.push(0xC0 | (src.lo() << 3) | dst.lo());
881 self.flush_instbuf(ib);
882 } else {
883 let mut ib = InstBuf::new();
884 ib.push(op);
885 ib.push(0xC0 | (src.lo() << 3) | dst.lo());
886 self.flush_instbuf(ib);
887 }
888 }
889
890 #[inline(always)]
891 pub fn add_rr(&mut self, dst: Reg, src: Reg) {
892 self.alu_rr64(0x01, dst, src);
893 }
894 #[inline(always)]
895 pub fn sub_rr(&mut self, dst: Reg, src: Reg) {
896 self.alu_rr64(0x29, dst, src);
897 }
898 #[inline(always)]
899 pub fn and_rr(&mut self, dst: Reg, src: Reg) {
900 self.alu_rr64(0x21, dst, src);
901 }
902 #[inline(always)]
903 pub fn or_rr(&mut self, dst: Reg, src: Reg) {
904 self.alu_rr64(0x09, dst, src);
905 }
906 #[inline(always)]
907 pub fn xor_rr(&mut self, dst: Reg, src: Reg) {
908 self.alu_rr64(0x31, dst, src);
909 }
910 #[inline(always)]
911 pub fn cmp_rr(&mut self, a: Reg, b: Reg) {
912 self.alu_rr64(0x39, a, b);
913 }
914 #[inline(always)]
915 pub fn test_rr(&mut self, a: Reg, b: Reg) {
916 self.alu_rr64(0x85, a, b);
917 }
918
919 #[inline(always)]
921 pub fn test_byte_mem_disp32(&mut self, base: Reg, disp: i32, imm: u8) {
922 let mut ib = InstBuf::new();
923 if base.needs_rex() {
924 ib.push(0x41 | base.hi());
925 }
926 ib.push(0xF6); ib.push(0x80 | base.lo());
929 ib.push_i32(disp);
930 ib.push(imm);
931 self.flush_instbuf(ib);
932 }
933
934 #[inline(always)]
935 pub fn add_rr32(&mut self, dst: Reg, src: Reg) {
936 self.alu_rr32(0x01, dst, src);
937 }
938 #[inline(always)]
939 pub fn sub_rr32(&mut self, dst: Reg, src: Reg) {
940 self.alu_rr32(0x29, dst, src);
941 }
942
943 #[inline(always)]
947 fn alu_ri64(&mut self, ext: u8, dst: Reg, imm: i32) {
948 let mut ib = InstBuf::new();
949 ib.push(0x48 | dst.hi());
950 if (-128..=127).contains(&imm) {
951 ib.push(0x83);
952 ib.push(0xC0 | (ext << 3) | dst.lo());
953 ib.push(imm as u8);
954 } else {
955 ib.push(0x81);
956 ib.push(0xC0 | (ext << 3) | dst.lo());
957 ib.push_i32(imm);
958 }
959 self.flush_instbuf(ib);
960 }
961
962 #[inline(always)]
963 fn alu_ri32(&mut self, ext: u8, dst: Reg, imm: i32) {
964 let mut ib = InstBuf::new();
965 if dst.needs_rex() {
966 ib.push(0x40 | dst.hi());
967 }
968 if (-128..=127).contains(&imm) {
969 ib.push(0x83);
970 ib.push(0xC0 | (ext << 3) | dst.lo());
971 ib.push(imm as u8);
972 } else {
973 ib.push(0x81);
974 ib.push(0xC0 | (ext << 3) | dst.lo());
975 ib.push_i32(imm);
976 }
977 self.flush_instbuf(ib);
978 }
979
980 #[inline(always)]
981 pub fn add_ri(&mut self, dst: Reg, imm: i32) {
982 self.alu_ri64(0, dst, imm);
983 }
984 #[inline(always)]
985 pub fn sub_ri(&mut self, dst: Reg, imm: i32) {
986 self.alu_ri64(5, dst, imm);
987 }
988 #[inline(always)]
989 pub fn and_ri(&mut self, dst: Reg, imm: i32) {
990 self.alu_ri64(4, dst, imm);
991 }
992 #[inline(always)]
993 pub fn or_ri(&mut self, dst: Reg, imm: i32) {
994 self.alu_ri64(1, dst, imm);
995 }
996 #[inline(always)]
997 pub fn xor_ri(&mut self, dst: Reg, imm: i32) {
998 self.alu_ri64(6, dst, imm);
999 }
1000 #[inline(always)]
1001 pub fn cmp_ri(&mut self, a: Reg, imm: i32) {
1002 self.alu_ri64(7, a, imm);
1003 }
1004
1005 #[inline(always)]
1006 pub fn add_ri32(&mut self, dst: Reg, imm: i32) {
1007 self.alu_ri32(0, dst, imm);
1008 }
1009 #[inline(always)]
1010 pub fn sub_ri32(&mut self, dst: Reg, imm: i32) {
1011 self.alu_ri32(5, dst, imm);
1012 }
1013 #[inline(always)]
1014 pub fn cmp_ri32(&mut self, a: Reg, imm: i32) {
1015 self.alu_ri32(7, a, imm);
1016 }
1017
1018 pub fn cmp_mem32_imm(&mut self, base: Reg, disp: i32, imm: i32) {
1020 let mut ib = InstBuf::new();
1021 if base.hi() != 0 {
1022 ib.push(0x41);
1023 }
1024 ib.push(0x81);
1025 Self::modrm_disp_ib(&mut ib, 7, base, disp);
1026 ib.push_i32(imm);
1027 self.flush_instbuf(ib);
1028 }
1029
1030 pub fn cmp_mem32_r(&mut self, base: Reg, disp: i32, src: Reg) {
1032 let mut ib = InstBuf::new();
1033 if base.hi() != 0 || src.hi() != 0 {
1034 ib.push(0x40 | src.hi() << 2 | base.hi());
1035 }
1036 ib.push(0x39);
1037 Self::modrm_disp_ib(&mut ib, src.lo(), base, disp);
1038 self.flush_instbuf(ib);
1039 }
1040
1041 pub fn sub_mem64_imm32(&mut self, base: Reg, disp: i32, imm: i32) {
1044 self.rex_w_b(base);
1047 self.emit(0x81);
1048 self.modrm_disp32(5, base, disp);
1049 self.emit_i32(imm);
1050 }
1051
1052 pub fn add_mem64_imm32(&mut self, base: Reg, disp: i32, imm: i32) {
1054 self.rex_w_b(base);
1056 self.emit(0x81);
1057 self.modrm_disp32(0, base, disp);
1058 self.emit_i32(imm);
1059 }
1060
1061 fn modrm_baseless(&mut self, reg: u8, idx: Reg) {
1075 let r = reg & 7;
1076 match idx.lo() {
1077 4 => {
1078 self.emit((r << 3) | 4); self.emit(0x24); }
1082 5 => {
1083 self.emit(0x40 | (r << 3) | 5);
1085 self.emit(0);
1086 }
1087 _ => {
1088 self.emit((r << 3) | idx.lo());
1089 }
1090 }
1091 }
1092
1093 pub fn movzx_load8_at_index(&mut self, dst: Reg, idx: Reg) {
1095 self.emit(0x48 | (dst.hi() << 2) | idx.hi());
1096 self.emit(0x0F);
1097 self.emit(0xB6);
1098 self.modrm_baseless(dst.lo(), idx);
1099 }
1100
1101 pub fn movzx_load16_at_index(&mut self, dst: Reg, idx: Reg) {
1103 let rex = 0x40 | (dst.hi() << 2) | idx.hi();
1104 if rex != 0x40 {
1105 self.emit(rex);
1106 }
1107 self.emit(0x0F);
1108 self.emit(0xB7);
1109 self.modrm_baseless(dst.lo(), idx);
1110 }
1111
1112 pub fn mov_load32_at_index(&mut self, dst: Reg, idx: Reg) {
1114 let rex = 0x40 | (dst.hi() << 2) | idx.hi();
1115 if rex != 0x40 {
1116 self.emit(rex);
1117 }
1118 self.emit(0x8B);
1119 self.modrm_baseless(dst.lo(), idx);
1120 }
1121
1122 pub fn mov_load64_at_index(&mut self, dst: Reg, idx: Reg) {
1124 self.emit(0x48 | (dst.hi() << 2) | idx.hi());
1125 self.emit(0x8B);
1126 self.modrm_baseless(dst.lo(), idx);
1127 }
1128
1129 pub fn mov_store8_at_index(&mut self, idx: Reg, src: Reg) {
1131 let rex = 0x40 | (src.hi() << 2) | idx.hi();
1135 if rex != 0x40 || src.lo() >= 4 {
1136 self.emit(rex);
1137 }
1138 self.emit(0x88);
1139 self.modrm_baseless(src.lo(), idx);
1140 }
1141
1142 pub fn mov_store16_at_index(&mut self, idx: Reg, src: Reg) {
1144 self.emit(0x66);
1145 let rex = 0x40 | (src.hi() << 2) | idx.hi();
1146 if rex != 0x40 {
1147 self.emit(rex);
1148 }
1149 self.emit(0x89);
1150 self.modrm_baseless(src.lo(), idx);
1151 }
1152
1153 pub fn mov_store32_at_index(&mut self, idx: Reg, src: Reg) {
1155 let rex = 0x40 | (src.hi() << 2) | idx.hi();
1156 if rex != 0x40 {
1157 self.emit(rex);
1158 }
1159 self.emit(0x89);
1160 self.modrm_baseless(src.lo(), idx);
1161 }
1162
1163 pub fn mov_store64_at_index(&mut self, idx: Reg, src: Reg) {
1165 self.emit(0x48 | (src.hi() << 2) | idx.hi());
1166 self.emit(0x89);
1167 self.modrm_baseless(src.lo(), idx);
1168 }
1169
1170 pub fn mov_store8_at_index_imm(&mut self, idx: Reg, imm: u8) {
1172 if idx.hi() != 0 {
1173 self.emit(0x40 | idx.hi());
1174 }
1175 self.emit(0xC6);
1176 self.modrm_baseless(0, idx);
1177 self.emit(imm);
1178 }
1179
1180 pub fn mov_store16_at_index_imm(&mut self, idx: Reg, imm: u16) {
1182 self.emit(0x66);
1183 if idx.hi() != 0 {
1184 self.emit(0x40 | idx.hi());
1185 }
1186 self.emit(0xC7);
1187 self.modrm_baseless(0, idx);
1188 self.emit(imm as u8);
1189 self.emit((imm >> 8) as u8);
1190 }
1191
1192 pub fn mov_store32_at_index_imm(&mut self, idx: Reg, imm: i32) {
1194 if idx.hi() != 0 {
1195 self.emit(0x40 | idx.hi());
1196 }
1197 self.emit(0xC7);
1198 self.modrm_baseless(0, idx);
1199 self.emit_i32(imm);
1200 }
1201
1202 pub fn mov_store64_at_index_imm(&mut self, idx: Reg, imm: i32) {
1204 self.emit(0x48 | idx.hi());
1205 self.emit(0xC7);
1206 self.modrm_baseless(0, idx);
1207 self.emit_i32(imm);
1208 }
1209
1210 fn emit_rip_rel_disp32(&mut self, target_va: u64, trailing_bytes: u64) {
1230 let post_inst_rip = self
1231 .jit_va_base
1232 .wrapping_add(self.write_pos as u64)
1233 .wrapping_add(4)
1234 .wrapping_add(trailing_bytes);
1235 let disp = (target_va as i64).wrapping_sub(post_inst_rip as i64);
1236 debug_assert!(
1237 disp >= i32::MIN as i64 && disp <= i32::MAX as i64,
1238 "RIP-relative target 0x{:x} out of range from base 0x{:x} + offset 0x{:x}",
1239 target_va,
1240 self.jit_va_base,
1241 self.write_pos
1242 );
1243 self.emit_i32(disp as i32);
1244 }
1245
1246 fn modrm_rip_rel(&mut self, reg: u8) {
1247 self.emit(((reg & 7) << 3) | 5);
1249 }
1250
1251 pub fn mov_load32_rip_rel(&mut self, dst: Reg, target_va: u64) {
1253 if dst.hi() != 0 {
1254 self.emit(0x40 | (dst.hi() << 2));
1255 }
1256 self.emit(0x8B);
1257 self.modrm_rip_rel(dst.lo());
1258 self.emit_rip_rel_disp32(target_va, 0);
1259 }
1260
1261 pub fn mov_load64_rip_rel(&mut self, dst: Reg, target_va: u64) {
1263 self.emit(0x48 | (dst.hi() << 2));
1264 self.emit(0x8B);
1265 self.modrm_rip_rel(dst.lo());
1266 self.emit_rip_rel_disp32(target_va, 0);
1267 }
1268
1269 pub fn mov_store32_rip_rel(&mut self, target_va: u64, src: Reg) {
1271 if src.hi() != 0 {
1272 self.emit(0x40 | (src.hi() << 2));
1273 }
1274 self.emit(0x89);
1275 self.modrm_rip_rel(src.lo());
1276 self.emit_rip_rel_disp32(target_va, 0);
1277 }
1278
1279 pub fn mov_store64_rip_rel(&mut self, target_va: u64, src: Reg) {
1281 self.emit(0x48 | (src.hi() << 2));
1282 self.emit(0x89);
1283 self.modrm_rip_rel(src.lo());
1284 self.emit_rip_rel_disp32(target_va, 0);
1285 }
1286
1287 pub fn mov_store32_rip_rel_imm(&mut self, target_va: u64, imm: i32) {
1290 self.emit(0xC7);
1291 self.modrm_rip_rel(0);
1292 self.emit_rip_rel_disp32(target_va, 4);
1293 self.emit_i32(imm);
1294 }
1295
1296 pub fn cmp_mem32_rip_rel_r(&mut self, target_va: u64, src: Reg) {
1298 if src.hi() != 0 {
1299 self.emit(0x40 | (src.hi() << 2));
1300 }
1301 self.emit(0x39);
1302 self.modrm_rip_rel(src.lo());
1303 self.emit_rip_rel_disp32(target_va, 0);
1304 }
1305
1306 pub fn add_r64_mem_rip_rel(&mut self, dst: Reg, target_va: u64) {
1308 self.emit(0x48 | (dst.hi() << 2));
1309 self.emit(0x03);
1310 self.modrm_rip_rel(dst.lo());
1311 self.emit_rip_rel_disp32(target_va, 0);
1312 }
1313
1314 pub fn sub_r64_imm32_patchable(&mut self, dst: Reg, imm: i32) {
1320 self.emit(0x48 | dst.hi());
1323 self.emit(0x81);
1324 self.emit(0xE8 | dst.lo()); self.emit_i32(imm);
1326 }
1327
1328 pub fn imul_rr(&mut self, dst: Reg, src: Reg) {
1332 let mut ib = InstBuf::new();
1333 ib.push(0x48 | (dst.hi() << 2) | src.hi());
1334 ib.push(0x0F);
1335 ib.push(0xAF);
1336 ib.push(0xC0 | (dst.lo() << 3) | src.lo());
1337 self.flush_instbuf(ib);
1338 }
1339
1340 pub fn imul_rr32(&mut self, dst: Reg, src: Reg) {
1342 let mut ib = InstBuf::new();
1343 let r = dst.hi();
1344 let b = src.hi();
1345 if r != 0 || b != 0 {
1346 ib.push(0x40 | (r << 2) | b);
1347 }
1348 ib.push(0x0F);
1349 ib.push(0xAF);
1350 ib.push(0xC0 | (dst.lo() << 3) | src.lo());
1351 self.flush_instbuf(ib);
1352 }
1353
1354 pub fn imul_rri(&mut self, dst: Reg, src: Reg, imm: i32) {
1356 let mut ib = InstBuf::new();
1357 ib.push(0x48 | (dst.hi() << 2) | src.hi());
1358 ib.push(0x69);
1359 ib.push(0xC0 | (dst.lo() << 3) | src.lo());
1360 ib.push_i32(imm);
1361 self.flush_instbuf(ib);
1362 }
1363
1364 pub fn imul_rri32(&mut self, dst: Reg, src: Reg, imm: i32) {
1366 let mut ib = InstBuf::new();
1367 let r = dst.hi();
1368 let b = src.hi();
1369 if r != 0 || b != 0 {
1370 ib.push(0x40 | (r << 2) | b);
1371 }
1372 ib.push(0x69);
1373 ib.push(0xC0 | (dst.lo() << 3) | src.lo());
1374 ib.push_i32(imm);
1375 self.flush_instbuf(ib);
1376 }
1377
1378 pub fn mul_rdx_rax(&mut self, src: Reg) {
1382 self.emit3(0x48 | src.hi(), 0xF7, 0xE0 | src.lo());
1383 }
1384
1385 pub fn imul_rdx_rax(&mut self, src: Reg) {
1387 self.emit3(0x48 | src.hi(), 0xF7, 0xE8 | src.lo());
1388 }
1389
1390 pub fn div64(&mut self, src: Reg) {
1394 self.emit3(0x48 | src.hi(), 0xF7, 0xF0 | src.lo());
1395 }
1396
1397 pub fn idiv64(&mut self, src: Reg) {
1399 self.emit3(0x48 | src.hi(), 0xF7, 0xF8 | src.lo());
1400 }
1401
1402 pub fn div32(&mut self, src: Reg) {
1404 if src.needs_rex() {
1405 self.emit3(0x41, 0xF7, 0xF0 | src.lo());
1406 } else {
1407 let mut ib = InstBuf::new();
1408 ib.push(0xF7);
1409 ib.push(0xF0 | src.lo());
1410 self.flush_instbuf(ib);
1411 }
1412 }
1413
1414 pub fn idiv32(&mut self, src: Reg) {
1416 if src.needs_rex() {
1417 self.emit3(0x41, 0xF7, 0xF8 | src.lo());
1418 } else {
1419 let mut ib = InstBuf::new();
1420 ib.push(0xF7);
1421 ib.push(0xF8 | src.lo());
1422 self.flush_instbuf(ib);
1423 }
1424 }
1425
1426 pub fn cqo(&mut self) {
1428 self.emit(0x48);
1429 self.emit(0x99);
1430 }
1431
1432 pub fn cdq(&mut self) {
1434 self.emit(0x99);
1435 }
1436
1437 pub fn inc64(&mut self, dst: Reg) {
1441 self.emit3(0x48 | dst.hi(), 0xFF, 0xC0 | dst.lo());
1442 }
1443
1444 pub fn dec64(&mut self, dst: Reg) {
1446 self.emit3(0x48 | dst.hi(), 0xFF, 0xC8 | dst.lo());
1447 }
1448
1449 pub fn neg64(&mut self, dst: Reg) {
1453 self.emit3(0x48 | dst.hi(), 0xF7, 0xD8 | dst.lo());
1454 }
1455
1456 pub fn neg32(&mut self, dst: Reg) {
1457 if dst.needs_rex() {
1458 self.emit3(0x41, 0xF7, 0xD8 | dst.lo());
1459 } else {
1460 let mut ib = InstBuf::new();
1461 ib.push(0xF7);
1462 ib.push(0xD8 | dst.lo());
1463 self.flush_instbuf(ib);
1464 }
1465 }
1466
1467 pub fn not64(&mut self, dst: Reg) {
1469 self.emit3(0x48 | dst.hi(), 0xF7, 0xD0 | dst.lo());
1470 }
1471
1472 fn shift_ri64(&mut self, ext: u8, dst: Reg, imm: u8) {
1475 let mut ib = InstBuf::new();
1476 ib.push(0x48 | dst.hi());
1477 ib.push(0xC1);
1478 ib.push(0xC0 | (ext << 3) | dst.lo());
1479 ib.push(imm);
1480 self.flush_instbuf(ib);
1481 }
1482
1483 pub fn shift_cl64(&mut self, ext: u8, dst: Reg) {
1484 let mut ib = InstBuf::new();
1485 ib.push(0x48 | dst.hi());
1486 ib.push(0xD3);
1487 ib.push(0xC0 | (ext << 3) | dst.lo());
1488 self.flush_instbuf(ib);
1489 }
1490
1491 fn shift_ri32(&mut self, ext: u8, dst: Reg, imm: u8) {
1492 let mut ib = InstBuf::new();
1493 if dst.needs_rex() {
1494 ib.push(0x40 | dst.hi());
1495 }
1496 ib.push(0xC1);
1497 ib.push(0xC0 | (ext << 3) | dst.lo());
1498 ib.push(imm);
1499 self.flush_instbuf(ib);
1500 }
1501
1502 pub fn shift_cl32(&mut self, ext: u8, dst: Reg) {
1503 let mut ib = InstBuf::new();
1504 if dst.needs_rex() {
1505 ib.push(0x40 | dst.hi());
1506 }
1507 ib.push(0xD3);
1508 ib.push(0xC0 | (ext << 3) | dst.lo());
1509 self.flush_instbuf(ib);
1510 }
1511
1512 pub fn shl_ri64(&mut self, dst: Reg, imm: u8) {
1513 self.shift_ri64(4, dst, imm);
1514 }
1515 pub fn shr_ri64(&mut self, dst: Reg, imm: u8) {
1516 self.shift_ri64(5, dst, imm);
1517 }
1518 pub fn sar_ri64(&mut self, dst: Reg, imm: u8) {
1519 self.shift_ri64(7, dst, imm);
1520 }
1521 pub fn shl_cl64(&mut self, dst: Reg) {
1522 self.shift_cl64(4, dst);
1523 }
1524 pub fn shr_cl64(&mut self, dst: Reg) {
1525 self.shift_cl64(5, dst);
1526 }
1527 pub fn sar_cl64(&mut self, dst: Reg) {
1528 self.shift_cl64(7, dst);
1529 }
1530 pub fn rol_cl64(&mut self, dst: Reg) {
1531 self.shift_cl64(0, dst);
1532 }
1533 pub fn ror_cl64(&mut self, dst: Reg) {
1534 self.shift_cl64(1, dst);
1535 }
1536 pub fn rol_ri64(&mut self, dst: Reg, imm: u8) {
1537 self.shift_ri64(0, dst, imm);
1538 }
1539 pub fn ror_ri64(&mut self, dst: Reg, imm: u8) {
1540 self.shift_ri64(1, dst, imm);
1541 }
1542
1543 pub fn shl_ri32(&mut self, dst: Reg, imm: u8) {
1544 self.shift_ri32(4, dst, imm);
1545 }
1546 pub fn shr_ri32(&mut self, dst: Reg, imm: u8) {
1547 self.shift_ri32(5, dst, imm);
1548 }
1549 pub fn sar_ri32(&mut self, dst: Reg, imm: u8) {
1550 self.shift_ri32(7, dst, imm);
1551 }
1552 pub fn shl_cl32(&mut self, dst: Reg) {
1553 self.shift_cl32(4, dst);
1554 }
1555 pub fn shr_cl32(&mut self, dst: Reg) {
1556 self.shift_cl32(5, dst);
1557 }
1558 pub fn sar_cl32(&mut self, dst: Reg) {
1559 self.shift_cl32(7, dst);
1560 }
1561 pub fn rol_cl32(&mut self, dst: Reg) {
1562 self.shift_cl32(0, dst);
1563 }
1564 pub fn ror_cl32(&mut self, dst: Reg) {
1565 self.shift_cl32(1, dst);
1566 }
1567 pub fn rol_ri32(&mut self, dst: Reg, imm: u8) {
1568 self.shift_ri32(0, dst, imm);
1569 }
1570 pub fn ror_ri32(&mut self, dst: Reg, imm: u8) {
1571 self.shift_ri32(1, dst, imm);
1572 }
1573
1574 #[inline(always)]
1578 pub fn movsxd(&mut self, dst: Reg, src: Reg) {
1579 let mut ib = InstBuf::new();
1580 ib.push(0x48 | (dst.hi() << 2) | src.hi());
1581 ib.push(0x63);
1582 ib.push(0xC0 | (dst.lo() << 3) | src.lo());
1583 self.flush_instbuf(ib);
1584 }
1585
1586 pub fn movsx_8_64(&mut self, dst: Reg, src: Reg) {
1588 let mut ib = InstBuf::new();
1589 ib.push(0x48 | (dst.hi() << 2) | src.hi());
1590 ib.push(0x0F);
1591 ib.push(0xBE);
1592 ib.push(0xC0 | (dst.lo() << 3) | src.lo());
1593 self.flush_instbuf(ib);
1594 }
1595
1596 pub fn movsx_16_64(&mut self, dst: Reg, src: Reg) {
1598 let mut ib = InstBuf::new();
1599 ib.push(0x48 | (dst.hi() << 2) | src.hi());
1600 ib.push(0x0F);
1601 ib.push(0xBF);
1602 ib.push(0xC0 | (dst.lo() << 3) | src.lo());
1603 self.flush_instbuf(ib);
1604 }
1605
1606 pub fn movzx_8_64(&mut self, dst: Reg, src: Reg) {
1608 let mut ib = InstBuf::new();
1609 ib.push(0x48 | (dst.hi() << 2) | src.hi());
1610 ib.push(0x0F);
1611 ib.push(0xB6);
1612 ib.push(0xC0 | (dst.lo() << 3) | src.lo());
1613 self.flush_instbuf(ib);
1614 }
1615
1616 pub fn movzx_16_64(&mut self, dst: Reg, src: Reg) {
1618 let mut ib = InstBuf::new();
1619 let r = dst.hi();
1620 let b = src.hi();
1621 if r != 0 || b != 0 {
1622 ib.push(0x40 | (r << 2) | b);
1623 }
1624 ib.push(0x0F);
1625 ib.push(0xB7);
1626 ib.push(0xC0 | (dst.lo() << 3) | src.lo());
1627 self.flush_instbuf(ib);
1628 }
1629
1630 #[inline(always)]
1632 pub fn movzx_32_64(&mut self, dst: Reg, src: Reg) {
1633 let mut ib = InstBuf::new();
1634 let r = src.hi();
1635 let b = dst.hi();
1636 if r != 0 || b != 0 {
1637 ib.push(0x40 | (r << 2) | b);
1638 }
1639 ib.push(0x89);
1640 ib.push(0xC0 | (src.lo() << 3) | dst.lo());
1641 self.flush_instbuf(ib);
1642 }
1643
1644 #[inline(always)]
1648 pub fn setcc(&mut self, cc: Cc, dst: Reg) {
1649 let mut ib = InstBuf::new();
1650 if dst.needs_rex() || matches!(dst, Reg::RSP | Reg::RBP | Reg::RSI | Reg::RDI) {
1651 ib.push(0x40 | dst.hi());
1652 }
1653 ib.push(0x0F);
1654 ib.push(0x90 + cc as u8);
1655 ib.push(0xC0 | dst.lo());
1656 self.flush_instbuf(ib);
1657 }
1658
1659 #[inline(always)]
1661 pub fn cmovcc(&mut self, cc: Cc, dst: Reg, src: Reg) {
1662 let mut ib = InstBuf::new();
1663 ib.push(0x48 | (dst.hi() << 2) | src.hi());
1664 ib.push(0x0F);
1665 ib.push(0x40 + cc as u8);
1666 ib.push(0xC0 | (dst.lo() << 3) | src.lo());
1667 self.flush_instbuf(ib);
1668 }
1669
1670 pub fn popcnt64(&mut self, dst: Reg, src: Reg) {
1674 let mut ib = InstBuf::new();
1675 ib.push(0xF3);
1676 ib.push(0x48 | (dst.hi() << 2) | src.hi());
1677 ib.push(0x0F);
1678 ib.push(0xB8);
1679 ib.push(0xC0 | (dst.lo() << 3) | src.lo());
1680 self.flush_instbuf(ib);
1681 }
1682
1683 pub fn lzcnt64(&mut self, dst: Reg, src: Reg) {
1685 let mut ib = InstBuf::new();
1686 ib.push(0xF3);
1687 ib.push(0x48 | (dst.hi() << 2) | src.hi());
1688 ib.push(0x0F);
1689 ib.push(0xBD);
1690 ib.push(0xC0 | (dst.lo() << 3) | src.lo());
1691 self.flush_instbuf(ib);
1692 }
1693
1694 pub fn tzcnt64(&mut self, dst: Reg, src: Reg) {
1696 let mut ib = InstBuf::new();
1697 ib.push(0xF3);
1698 ib.push(0x48 | (dst.hi() << 2) | src.hi());
1699 ib.push(0x0F);
1700 ib.push(0xBC);
1701 ib.push(0xC0 | (dst.lo() << 3) | src.lo());
1702 self.flush_instbuf(ib);
1703 }
1704
1705 pub fn popcnt32(&mut self, dst: Reg, src: Reg) {
1707 let mut ib = InstBuf::new();
1708 ib.push(0xF3);
1709 let rex = (dst.hi() << 2) | src.hi();
1710 if rex != 0 {
1711 ib.push(0x40 | rex);
1712 }
1713 ib.push(0x0F);
1714 ib.push(0xB8);
1715 ib.push(0xC0 | (dst.lo() << 3) | src.lo());
1716 self.flush_instbuf(ib);
1717 }
1718
1719 pub fn lzcnt32(&mut self, dst: Reg, src: Reg) {
1721 let mut ib = InstBuf::new();
1722 ib.push(0xF3);
1723 let rex = (dst.hi() << 2) | src.hi();
1724 if rex != 0 {
1725 ib.push(0x40 | rex);
1726 }
1727 ib.push(0x0F);
1728 ib.push(0xBD);
1729 ib.push(0xC0 | (dst.lo() << 3) | src.lo());
1730 self.flush_instbuf(ib);
1731 }
1732
1733 pub fn tzcnt32(&mut self, dst: Reg, src: Reg) {
1735 let mut ib = InstBuf::new();
1736 ib.push(0xF3);
1737 let rex = (dst.hi() << 2) | src.hi();
1738 if rex != 0 {
1739 ib.push(0x40 | rex);
1740 }
1741 ib.push(0x0F);
1742 ib.push(0xBC);
1743 ib.push(0xC0 | (dst.lo() << 3) | src.lo());
1744 self.flush_instbuf(ib);
1745 }
1746
1747 pub fn bswap64(&mut self, dst: Reg) {
1749 self.emit3(0x48 | dst.hi(), 0x0F, 0xC8 + dst.lo());
1750 }
1751
1752 #[inline(always)]
1755 pub fn push(&mut self, reg: Reg) {
1756 self.rex_opt_b(reg);
1757 self.emit(0x50 + reg.lo());
1758 }
1759
1760 #[inline(always)]
1761 pub fn pop(&mut self, reg: Reg) {
1762 self.rex_opt_b(reg);
1763 self.emit(0x58 + reg.lo());
1764 }
1765
1766 pub fn push_imm32(&mut self, imm: i32) {
1768 self.emit(0x68);
1769 self.emit_i32(imm);
1770 }
1771
1772 #[inline(always)]
1776 pub fn jmp_label(&mut self, label: Label) {
1777 let bound = self.labels[label.0 as usize];
1778 if bound != LABEL_UNBOUND {
1779 let target = (bound - 1) as isize; let rel = target - (self.write_pos as isize + 2);
1782 if rel >= i8::MIN as isize && rel <= i8::MAX as isize {
1783 self.emit(0xEB);
1784 self.emit(rel as u8);
1785 return;
1786 }
1787 }
1788 self.emit(0xE9);
1790 self.emit_label_fixup(label);
1791 }
1792
1793 #[inline(always)]
1795 pub fn jcc_label(&mut self, cc: Cc, label: Label) {
1796 let bound = self.labels[label.0 as usize];
1797 if bound != LABEL_UNBOUND {
1798 let target = (bound - 1) as isize; let rel = target - (self.write_pos as isize + 2);
1801 if rel >= i8::MIN as isize && rel <= i8::MAX as isize {
1802 self.emit(0x70 + cc as u8);
1803 self.emit(rel as u8);
1804 return;
1805 }
1806 }
1807 self.emit(0x0F);
1809 self.emit(0x80 + cc as u8);
1810 self.emit_label_fixup(label);
1811 }
1812
1813 pub fn jmp_reg(&mut self, reg: Reg) {
1815 self.rex_opt_b(reg);
1816 self.emit(0xFF);
1817 self.emit(0xE0 | reg.lo()); }
1819
1820 pub fn call_reg(&mut self, reg: Reg) {
1822 self.rex_opt_b(reg);
1823 self.emit(0xFF);
1824 self.emit(0xD0 | reg.lo()); }
1826
1827 pub fn call_label(&mut self, label: Label) {
1829 self.emit(0xE8);
1830 self.emit_label_fixup(label);
1831 }
1832
1833 pub fn ret(&mut self) {
1835 self.emit(0xC3);
1836 }
1837
1838 pub fn lea(&mut self, dst: Reg, base: Reg, disp: i32) {
1842 let mut ib = InstBuf::new();
1843 ib.push(0x48 | (dst.hi() << 2) | base.hi());
1844 ib.push(0x8D);
1845 Self::modrm_disp_ib(&mut ib, dst.lo(), base, disp);
1846 self.flush_instbuf(ib);
1847 }
1848
1849 #[inline(always)]
1851 pub fn lea_32(&mut self, dst: Reg, base: Reg, disp: i32) {
1852 let mut ib = InstBuf::new();
1853 let r = dst.hi();
1854 let b = base.hi();
1855 if r != 0 || b != 0 {
1856 ib.push(0x40 | (r << 2) | b);
1857 }
1858 ib.push(0x8D);
1859 Self::modrm_disp_ib(&mut ib, dst.lo(), base, disp);
1860 self.flush_instbuf(ib);
1861 }
1862
1863 #[inline(always)]
1866 pub fn lea_sib_scaled_32(&mut self, dst: Reg, base: Reg, index: Reg, scale_log2: u8) {
1867 debug_assert!(scale_log2 <= 3);
1868 let mut ib = InstBuf::new();
1869 let rex = 0x40 | (dst.hi() << 2) | (index.hi() << 1) | base.hi();
1870 if rex != 0x40 {
1871 ib.push(rex);
1872 }
1873 ib.push(0x8D);
1874 let scale_bits = scale_log2 << 6;
1875 if base.lo() == 5 {
1876 ib.push(0x44 | (dst.lo() << 3));
1877 ib.push(scale_bits | (index.lo() << 3) | base.lo());
1878 ib.push(0x00);
1879 } else {
1880 ib.push((dst.lo() << 3) | 0x04);
1881 ib.push(scale_bits | (index.lo() << 3) | base.lo());
1882 }
1883 self.flush_instbuf(ib);
1884 }
1885
1886 pub fn ud2(&mut self) {
1890 self.emit(0x0F);
1891 self.emit(0x0B);
1892 }
1893
1894 pub fn nop(&mut self) {
1896 self.emit(0x90);
1897 }
1898
1899 pub fn int3(&mut self) {
1901 self.emit(0xCC);
1902 }
1903
1904 pub fn label_offset(&self, label: Label) -> Option<usize> {
1908 let off = self.labels[label.0 as usize];
1909 if off == LABEL_UNBOUND {
1910 None
1911 } else {
1912 Some((off - 1) as usize)
1913 }
1914 }
1915
1916 pub fn sync_len(&mut self) {
1918 let CodeBuf::Vec(code) = &mut self.code_buf;
1919 unsafe {
1921 code.set_len(self.write_pos);
1922 }
1923 }
1924
1925 fn resolve_fixups(&mut self) {
1927 for fixup in &self.fixups {
1928 let stored = self.labels[fixup.label.0 as usize];
1929 assert!(stored != LABEL_UNBOUND, "unbound label {:?}", fixup.label);
1931 let target = stored - 1; let rel = (target as i64) - (fixup.offset as i64 + 4);
1933 let rel32 = rel as i32;
1934 unsafe {
1936 core::ptr::copy_nonoverlapping(
1937 rel32.to_le_bytes().as_ptr(),
1938 self.buf.add(fixup.offset),
1939 4,
1940 );
1941 }
1942 }
1943 }
1944
1945 pub fn finalize(&mut self) -> Vec<u8> {
1947 self.resolve_fixups();
1948 let CodeBuf::Vec(code) = &mut self.code_buf;
1949 unsafe {
1951 code.set_len(self.write_pos);
1952 }
1953 core::mem::take(code)
1954 }
1955
1956 #[cfg(test)]
1958 pub fn code_bytes(&mut self) -> &[u8] {
1959 self.sync_len();
1960 let CodeBuf::Vec(v) = &self.code_buf;
1961 v.as_slice()
1962 }
1963}
1964
1965#[cfg(test)]
1966mod tests {
1967 use super::*;
1968
1969 #[test]
1970 fn test_mov_ri64_zero() {
1971 let mut asm = Assembler::new();
1972 asm.mov_ri64(Reg::RAX, 0);
1973 assert_eq!(asm.code_bytes(), &[0x31, 0xC0]);
1975 }
1976
1977 #[test]
1978 fn test_mov_ri64_small() {
1979 let mut asm = Assembler::new();
1980 asm.mov_ri64(Reg::RAX, 42);
1981 assert_eq!(asm.code_bytes(), &[0xB8, 0x2A, 0x00, 0x00, 0x00]);
1983 }
1984
1985 #[test]
1986 fn test_label_resolution() {
1987 let mut asm = Assembler::new();
1988 let lbl = asm.new_label();
1989 asm.jmp_label(lbl); asm.nop(); asm.bind_label(lbl); let code = asm.finalize();
1993 assert_eq!(code[0], 0xE9);
1997 let rel = i32::from_le_bytes([code[1], code[2], code[3], code[4]]);
1998 assert_eq!(rel, 1); }
2000
2001 #[test]
2005 fn test_mov_load32_at_rdx() {
2006 let mut asm = Assembler::new();
2007 asm.mov_load32_at_index(Reg::RAX, Reg::RDX);
2008 assert_eq!(asm.code_bytes(), &[0x8B, 0x02]);
2009 }
2010
2011 #[test]
2014 fn test_mov_load64_at_r12_into_r8() {
2015 let mut asm = Assembler::new();
2016 asm.mov_load64_at_index(Reg::R8, Reg::R12);
2017 assert_eq!(asm.code_bytes(), &[0x4D, 0x8B, 0x04, 0x24]);
2022 }
2023
2024 #[test]
2028 fn test_mov_load32_rip_rel() {
2029 let mut asm = Assembler::new();
2030 asm.set_jit_va_base(0x1_0000_0000); asm.mov_load32_rip_rel(Reg::RAX, 0x1_0000_0040); assert_eq!(asm.code_bytes(), &[0x8B, 0x05, 0x3A, 0x00, 0x00, 0x00]);
2036 }
2037
2038 #[test]
2040 fn test_mov_store64_rip_rel_r15() {
2041 let mut asm = Assembler::new();
2042 asm.set_jit_va_base(0x1_0000_0000);
2043 asm.mov_store64_rip_rel(0x1_0000_0080, Reg::R15);
2044 assert_eq!(
2047 asm.code_bytes(),
2048 &[0x4C, 0x89, 0x3D, 0x79, 0x00, 0x00, 0x00]
2049 );
2050 }
2051
2052 #[test]
2054 fn test_mov_store32_at_rdx() {
2055 let mut asm = Assembler::new();
2056 asm.mov_store32_at_index(Reg::RDX, Reg::RAX);
2057 assert_eq!(asm.code_bytes(), &[0x89, 0x02]);
2058 }
2059
2060 #[test]
2062 fn test_mov_store32_at_rdx_from_r11() {
2063 let mut asm = Assembler::new();
2064 asm.mov_store32_at_index(Reg::RDX, Reg::R11);
2065 assert_eq!(asm.code_bytes(), &[0x44, 0x89, 0x1A]);
2066 }
2067
2068 #[test]
2072 fn test_mov_store8_at_rdx_from_sil() {
2073 let mut asm = Assembler::new();
2074 asm.mov_store8_at_index(Reg::RDX, Reg::RSI);
2075 assert_eq!(asm.code_bytes(), &[0x40, 0x88, 0x32]);
2076 }
2077
2078 #[test]
2079 fn test_mov_store8_at_rdx_from_dil() {
2080 let mut asm = Assembler::new();
2081 asm.mov_store8_at_index(Reg::RDX, Reg::RDI);
2082 assert_eq!(asm.code_bytes(), &[0x40, 0x88, 0x3A]);
2083 }
2084
2085 #[test]
2086 fn test_mov_store8_at_rdx_from_bpl() {
2087 let mut asm = Assembler::new();
2088 asm.mov_store8_at_index(Reg::RDX, Reg::RBP);
2089 assert_eq!(asm.code_bytes(), &[0x40, 0x88, 0x2A]);
2090 }
2091
2092 #[test]
2093 fn test_mov_store8_at_rdx_from_spl() {
2094 let mut asm = Assembler::new();
2095 asm.mov_store8_at_index(Reg::RDX, Reg::RSP);
2096 assert_eq!(asm.code_bytes(), &[0x40, 0x88, 0x22]);
2097 }
2098
2099 #[test]
2101 fn test_mov_store8_at_rdx_from_al_no_rex() {
2102 let mut asm = Assembler::new();
2103 asm.mov_store8_at_index(Reg::RDX, Reg::RAX);
2104 assert_eq!(asm.code_bytes(), &[0x88, 0x02]);
2105 }
2106
2107 #[test]
2112 fn test_mov_store32_rip_rel_imm() {
2113 let mut asm = Assembler::new();
2114 asm.set_jit_va_base(0x1_0000_0000);
2115 asm.mov_store32_rip_rel_imm(0x1_0000_0100, 0x123);
2116 assert_eq!(
2118 asm.code_bytes(),
2119 &[0xC7, 0x05, 0xF6, 0x00, 0x00, 0x00, 0x23, 0x01, 0x00, 0x00]
2120 );
2121 }
2122
2123 #[test]
2126 fn test_add_r64_mem_rip_rel() {
2127 let mut asm = Assembler::new();
2128 asm.set_jit_va_base(0x1_0000_0000);
2129 asm.add_r64_mem_rip_rel(Reg::RAX, 0x1_0000_0020);
2130 assert_eq!(
2133 asm.code_bytes(),
2134 &[0x48, 0x03, 0x05, 0x19, 0x00, 0x00, 0x00]
2135 );
2136 }
2137
2138 #[test]
2141 fn test_sub_r64_imm32_patchable_r15() {
2142 let mut asm = Assembler::new();
2143 asm.sub_r64_imm32_patchable(Reg::R15, 0xCAFE_F00D_u32 as i32);
2144 let bytes = asm.code_bytes();
2145 assert_eq!(bytes.len(), 7, "patchable sub must be 7 bytes");
2146 assert_eq!(&bytes[0..3], &[0x49, 0x81, 0xEF]);
2151 assert_eq!(&bytes[3..7], &[0x0D, 0xF0, 0xFE, 0xCA]);
2152 }
2153
2154 #[test]
2155 fn test_push_pop_r15() {
2156 let mut asm = Assembler::new();
2157 asm.push(Reg::R15);
2158 asm.pop(Reg::R15);
2159 assert_eq!(asm.code_bytes(), &[0x41, 0x57, 0x41, 0x5F]);
2161 }
2162}