1use alloc::format;
12use alloc::string::String;
13use alloc::vec::Vec;
14
15#[cfg(feature = "std")]
19macro_rules! trace_eprintln {
20 ($($t:tt)*) => { std::eprintln!($($t)*) };
21}
22#[cfg(not(feature = "std"))]
23macro_rules! trace_eprintln {
24 ($($t:tt)*) => { let _ = format_args!($($t)*); };
25}
26
27#[derive(Clone, Copy, Default, Debug)]
30struct ExecUnits {
31 alu: u8,
32 load: u8,
33 store: u8,
34 mul: u8,
35 div: u8,
36}
37
38impl ExecUnits {
39 fn can_satisfy(self, req: ExecUnits) -> bool {
40 self.alu >= req.alu
41 && self.load >= req.load
42 && self.store >= req.store
43 && self.mul >= req.mul
44 && self.div >= req.div
45 }
46 fn sub(self, req: ExecUnits) -> ExecUnits {
47 ExecUnits {
48 alu: self.alu - req.alu,
49 load: self.load - req.load,
50 store: self.store - req.store,
51 mul: self.mul - req.mul,
52 div: self.div - req.div,
53 }
54 }
55 const RESET: ExecUnits = ExecUnits {
56 alu: 4,
57 load: 4,
58 store: 4,
59 mul: 1,
60 div: 1,
61 };
62 const ALU: ExecUnits = ExecUnits {
63 alu: 1,
64 load: 0,
65 store: 0,
66 mul: 0,
67 div: 0,
68 };
69 const LOAD: ExecUnits = ExecUnits {
70 alu: 1,
71 load: 1,
72 store: 0,
73 mul: 0,
74 div: 0,
75 };
76 const STORE: ExecUnits = ExecUnits {
77 alu: 1,
78 load: 0,
79 store: 1,
80 mul: 0,
81 div: 0,
82 };
83 const MUL: ExecUnits = ExecUnits {
84 alu: 1,
85 load: 0,
86 store: 0,
87 mul: 1,
88 div: 0,
89 };
90 const DIV: ExecUnits = ExecUnits {
91 alu: 1,
92 load: 0,
93 store: 0,
94 mul: 0,
95 div: 1,
96 };
97 const NONE: ExecUnits = ExecUnits {
98 alu: 0,
99 load: 0,
100 store: 0,
101 mul: 0,
102 div: 0,
103 };
104 fn _to_eu_byte(self) -> u8 {
105 if self.div > 0 {
106 5
107 } else if self.mul > 0 {
108 4
109 } else if self.store > 0 {
110 3
111 } else if self.load > 0 {
112 2
113 } else if self.alu > 0 {
114 1
115 } else {
116 0
117 }
118 }
119}
120
121#[derive(Clone, Copy, PartialEq)]
122enum RobState {
123 Wait,
124 Exe,
125 Fin,
126}
127
128#[derive(Clone, Copy)]
129struct RobEntry {
130 state: RobState,
131 cycles_left: u32,
132 deps: [u8; 4], dep_count: u8,
134 dest_regs: RegSet,
135 exec_units: ExecUnits,
136}
137
138struct SimState {
139 ip: Option<usize>, cycles: u32,
141 decode_slots: u8, dispatch_slots: u8, exec_units: ExecUnits, rob: Vec<RobEntry>,
145}
146
147#[derive(Clone, Copy, Default, Debug)]
151struct RegSet {
152 regs: [u8; 3],
153 len: u8,
154}
155
156impl RegSet {
157 const EMPTY: Self = Self {
158 regs: [0; 3],
159 len: 0,
160 };
161 fn one(r: u8) -> Self {
162 Self {
163 regs: [r, 0, 0],
164 len: 1,
165 }
166 }
167 fn two(a: u8, b: u8) -> Self {
168 Self {
169 regs: [a, b, 0],
170 len: 2,
171 }
172 }
173 #[inline]
174 fn contains(&self, r: u8) -> bool {
175 (self.len >= 1 && self.regs[0] == r)
176 || (self.len >= 2 && self.regs[1] == r)
177 || (self.len >= 3 && self.regs[2] == r)
178 }
179 #[inline]
180 fn iter(&self) -> impl Iterator<Item = &u8> {
181 self.regs[..self.len as usize].iter()
182 }
183}
184
185struct InstrCost {
186 cycles: u32,
187 decode_slots: u8,
188 exec_units: ExecUnits,
189 dest_regs: RegSet,
190 src_regs: RegSet,
191 is_terminator: bool,
192 is_move_reg: bool,
193}
194
195fn dst_overlaps_src(dst: u8, srcs: &RegSet) -> bool {
196 srcs.contains(dst)
197}
198
199fn branch_cost(code: &[u8], bitmask: &[u8], target: usize) -> u32 {
201 if target < code.len() && target < bitmask.len() && bitmask[target] == 1 {
202 let opcode = code[target];
203 if opcode == 0 || opcode == 2 { 1 } else { 20 }
204 } else {
205 20
206 }
207}
208
209fn extract_reg(code: &[u8], pc: usize, byte_offset: usize, shift: u8) -> u8 {
214 if pc + byte_offset < code.len() {
215 (code[pc + byte_offset] >> shift) & 0x0F
216 } else {
217 0
218 }
219}
220
221fn reg_a(code: &[u8], pc: usize) -> u8 {
223 extract_reg(code, pc, 1, 0)
224}
225fn reg_b(code: &[u8], pc: usize) -> u8 {
227 extract_reg(code, pc, 1, 4)
228}
229fn reg_d(code: &[u8], pc: usize) -> u8 {
231 extract_reg(code, pc, 2, 0)
232}
233
234pub fn skip_distance(bitmask: &[u8], pc: usize) -> usize {
236 for j in 0..25 {
237 let idx = pc + 1 + j;
238 let bit = if idx < bitmask.len() { bitmask[idx] } else { 1 };
239 if bit == 1 {
240 return j;
241 }
242 }
243 24
244}
245
246fn extract_branch_target(code: &[u8], bitmask: &[u8], pc: usize) -> usize {
248 let skip = skip_distance(bitmask, pc);
249 let instr_len = 1 + skip;
253 if instr_len >= 3 && pc + instr_len <= code.len() {
254 let raw = crate::args::decode_args(
258 code,
259 pc,
260 skip,
261 crate::instruction::InstructionCategory::OneRegImmOffset,
262 );
263 if let crate::args::Args::RegImmOffset { offset, .. } = raw {
264 return offset as usize;
265 }
266 }
267 pc }
269
270fn extract_two_reg_branch_target(code: &[u8], bitmask: &[u8], pc: usize) -> usize {
272 let skip = skip_distance(bitmask, pc);
273 let raw = crate::args::decode_args(
274 code,
275 pc,
276 skip,
277 crate::instruction::InstructionCategory::TwoRegOneOffset,
278 );
279 if let crate::args::Args::TwoRegOffset { offset, .. } = raw {
280 return offset as usize;
281 }
282 pc
283}
284
285fn instruction_cost(code: &[u8], bitmask: &[u8], pc: usize) -> InstrCost {
287 let opcode = if pc < code.len() { code[pc] } else { 0 };
288 let ra = reg_a(code, pc);
289 let rb = reg_b(code, pc);
290 let rd = reg_d(code, pc);
291
292 let mk = |cy: u32, dc: u8, eu: ExecUnits, dst: RegSet, src: RegSet| -> InstrCost {
293 InstrCost {
294 cycles: cy,
295 decode_slots: dc,
296 exec_units: eu,
297 dest_regs: dst,
298 src_regs: src,
299 is_terminator: false,
300 is_move_reg: false,
301 }
302 };
303 let mkt = |cy: u32, dc: u8, eu: ExecUnits, dst: RegSet, src: RegSet| -> InstrCost {
304 InstrCost {
305 cycles: cy,
306 decode_slots: dc,
307 exec_units: eu,
308 dest_regs: dst,
309 src_regs: src,
310 is_terminator: true,
311 is_move_reg: false,
312 }
313 };
314 let e = RegSet::EMPTY;
315 let r1 = RegSet::one;
316 let r2 = RegSet::two;
317
318 match opcode {
319 0 => mkt(2, 1, ExecUnits::NONE, e, e), 1 => mkt(2, 1, ExecUnits::NONE, e, e), 2 => mkt(40, 1, ExecUnits::NONE, e, e), 10 => mkt(100, 4, ExecUnits::ALU, e, e), 40 => mkt(15, 1, ExecUnits::ALU, e, e), 80 => {
328 let skip = skip_distance(bitmask, pc);
330 let raw = crate::args::decode_args(
331 code,
332 pc,
333 skip,
334 crate::instruction::InstructionCategory::OneRegImmOffset,
335 );
336 let r = if let crate::args::Args::RegImmOffset { ra: r, .. } = raw {
337 r as u8
338 } else {
339 ra
340 };
341 mkt(15, 1, ExecUnits::ALU, r1(r), e)
342 }
343 50 => mkt(22, 1, ExecUnits::ALU, e, e), 180 => mkt(22, 1, ExecUnits::ALU, r1(ra), r1(rb)), 52..=58 => mk(25, 1, ExecUnits::LOAD, r1(ra), r1(rb)),
348 124..=130 => mk(25, 1, ExecUnits::LOAD, r1(ra), r1(rb)),
349
350 59..=62 => mk(25, 1, ExecUnits::STORE, e, r2(ra, rb)),
352 120..=123 => mk(25, 1, ExecUnits::STORE, e, r2(ra, rb)),
354 30..=33 => mk(25, 1, ExecUnits::STORE, e, e),
356 70..=73 => mk(25, 1, ExecUnits::STORE, e, r1(ra)),
358
359 51 => mk(1, 1, ExecUnits::NONE, r1(ra), e), 20 => mk(1, 2, ExecUnits::NONE, r1(ra), e), 100 => InstrCost {
365 cycles: 0,
366 decode_slots: 1,
367 exec_units: ExecUnits::NONE,
368 dest_regs: r1(ra),
369 src_regs: r1(rb),
370 is_terminator: false,
371 is_move_reg: true,
372 },
373
374 101 => mk(2, 1, ExecUnits::NONE, e, e),
376
377 81..=90 => {
379 let target = extract_branch_target(code, bitmask, pc);
380 let bc = branch_cost(code, bitmask, target);
381 mkt(bc, 1, ExecUnits::ALU, e, r1(ra))
382 }
383
384 170..=175 => {
386 let target = extract_two_reg_branch_target(code, bitmask, pc);
387 let bc = branch_cost(code, bitmask, target);
388 mkt(bc, 1, ExecUnits::ALU, e, r2(ra, rb))
389 }
390
391 200 | 201 | 210 | 211 | 212 => {
393 let dc = if dst_overlaps_src(ra, &r2(rb, rd)) {
394 1
395 } else {
396 2
397 };
398 mk(1, dc, ExecUnits::ALU, r1(ra), r2(rb, rd))
399 }
400 190 | 191 => {
402 let dc = if dst_overlaps_src(ra, &r2(rb, rd)) {
403 2
404 } else {
405 3
406 };
407 mk(2, dc, ExecUnits::ALU, r1(ra), r2(rb, rd))
408 }
409
410 132 | 133 | 134 | 149 | 151 | 152 | 153 | 158 | 110 => {
412 let dc = if dst_overlaps_src(ra, &r1(rb)) { 1 } else { 2 };
413 mk(1, dc, ExecUnits::ALU, r1(ra), r1(rb))
414 }
415 131 | 138 | 139 | 140 | 160 => {
417 let dc = if dst_overlaps_src(ra, &r1(rb)) { 2 } else { 3 };
418 mk(2, dc, ExecUnits::ALU, r1(ra), r1(rb))
419 }
420
421 102 | 103 | 104 | 105 | 108 | 109 => mk(1, 1, ExecUnits::ALU, r1(ra), r1(rb)),
423 106 | 107 => mk(2, 1, ExecUnits::ALU, r1(ra), r1(rb)),
425 111 => mk(1, 1, ExecUnits::ALU, r1(ra), r1(rb)),
427
428 207 | 208 | 209 | 220 | 222 => {
430 let dc = if rb == ra { 2 } else { 3 };
431 mk(1, dc, ExecUnits::ALU, r1(ra), r2(rb, rd))
432 }
433 197 | 198 | 199 | 221 | 223 => {
435 let dc = if rb == ra { 3 } else { 4 };
436 mk(2, dc, ExecUnits::ALU, r1(ra), r2(rb, rd))
437 }
438 155 | 156 | 157 | 159 => mk(1, 3, ExecUnits::ALU, r1(ra), r1(rb)),
440 144 | 145 | 146 | 161 => mk(2, 4, ExecUnits::ALU, r1(ra), r1(rb)),
442
443 216 | 217 => mk(3, 3, ExecUnits::ALU, r1(ra), r2(rb, rd)),
445 136 | 137 | 142 | 143 => mk(3, 3, ExecUnits::ALU, r1(ra), r1(rb)),
447
448 218 | 219 => mk(2, 2, ExecUnits::ALU, r1(ra), r2(rb, rd)),
450 147 | 148 => mk(2, 3, ExecUnits::ALU, r1(ra), r1(rb)),
452
453 227..=230 => {
455 let dc = if dst_overlaps_src(ra, &r2(rb, rd)) {
456 2
457 } else {
458 3
459 };
460 mk(3, dc, ExecUnits::ALU, r1(ra), r2(rb, rd))
461 }
462 224 | 225 => mk(2, 3, ExecUnits::ALU, r1(ra), r2(rb, rd)),
464 226 => {
466 let dc = if dst_overlaps_src(ra, &r2(rb, rd)) {
467 2
468 } else {
469 3
470 };
471 mk(2, dc, ExecUnits::ALU, r1(ra), r2(rb, rd))
472 }
473
474 154 => mk(2, 3, ExecUnits::ALU, r1(ra), r1(rb)),
476 141 => mk(3, 4, ExecUnits::ALU, r1(ra), r1(rb)),
478
479 202 => {
481 let dc = if dst_overlaps_src(ra, &r2(rb, rd)) {
482 1
483 } else {
484 2
485 };
486 mk(3, dc, ExecUnits::MUL, r1(ra), r2(rb, rd))
487 }
488 150 => {
490 let dc = if dst_overlaps_src(ra, &r1(rb)) { 1 } else { 2 };
491 mk(3, dc, ExecUnits::MUL, r1(ra), r1(rb))
492 }
493 192 => {
495 let dc = if dst_overlaps_src(ra, &r2(rb, rd)) {
496 2
497 } else {
498 3
499 };
500 mk(4, dc, ExecUnits::MUL, r1(ra), r2(rb, rd))
501 }
502 135 => {
504 let dc = if dst_overlaps_src(ra, &r1(rb)) { 2 } else { 3 };
505 mk(4, dc, ExecUnits::MUL, r1(ra), r1(rb))
506 }
507
508 213 | 214 => mk(4, 4, ExecUnits::MUL, r1(ra), r2(rb, rd)),
510 215 => mk(6, 4, ExecUnits::MUL, r1(ra), r2(rb, rd)),
512
513 193 | 194 | 195 | 196 | 203 | 204 | 205 | 206 => {
515 mk(60, 4, ExecUnits::DIV, r1(ra), r2(rb, rd))
516 }
517
518 _ => mk(1, 1, ExecUnits::NONE, e, e),
529 }
530}
531
532fn all_deps_finished(rob: &[RobEntry], entry: &RobEntry) -> bool {
535 for i in 0..entry.dep_count as usize {
536 let idx = entry.deps[i] as usize;
537 if idx < rob.len() && rob[idx].state != RobState::Fin {
538 return false;
539 }
540 }
541 true
542}
543
544fn find_ready_entry(rob: &[RobEntry], exec_units: ExecUnits) -> Option<usize> {
545 for (i, entry) in rob.iter().enumerate() {
546 if entry.state == RobState::Wait
547 && all_deps_finished(rob, entry)
548 && exec_units.can_satisfy(entry.exec_units)
549 {
550 return Some(i);
551 }
552 }
553 None
554}
555
556fn rob_all_finished(rob: &[RobEntry]) -> bool {
557 rob.iter().all(|e| e.state == RobState::Fin)
558}
559
560fn gas_sim_traced(code: &[u8], bitmask: &[u8], start_pc: usize, trace: bool) -> u32 {
563 let mut s = SimState {
564 ip: Some(start_pc),
565 cycles: 0,
566 decode_slots: 4,
567 dispatch_slots: 5,
568 exec_units: ExecUnits::RESET,
569 rob: Vec::with_capacity(32),
570 };
571
572 for iter in 0..100_000 {
573 if s.ip.is_some() && s.decode_slots > 0 && s.rob.len() < 32 {
575 let pc = s.ip.unwrap();
576 let cost = instruction_cost(code, bitmask, pc);
577 let mut deps = [0xFF_u8; 4];
578 let mut dep_count = 0u8;
579 for (i, e) in s.rob.iter().enumerate() {
580 if e.state != RobState::Fin
581 && e.dest_regs.iter().any(|dr| cost.src_regs.contains(*dr))
582 && dep_count < 4
583 {
584 deps[dep_count as usize] = i as u8;
585 dep_count += 1;
586 }
587 }
588 s.decode_slots = s.decode_slots.saturating_sub(cost.decode_slots);
589 let next_ip = if cost.is_terminator {
590 None
591 } else {
592 let skip = skip_distance(bitmask, pc);
593 let npc = pc + 1 + skip;
594 if npc < code.len() { Some(npc) } else { None }
595 };
596
597 if trace {
598 let op = crate::instruction::Opcode::from_byte(code[pc])
599 .map(|o| format!("{:?}", o))
600 .unwrap_or("?".into());
601 trace_eprintln!(
602 " [{}] DECODE pc={} {} cy={} dec={} rob_idx={} deps={:?} move={} term={} slots_left={}",
603 iter,
604 pc,
605 op,
606 cost.cycles,
607 cost.decode_slots,
608 s.rob.len(),
609 &deps[..dep_count as usize],
610 cost.is_move_reg,
611 cost.is_terminator,
612 s.decode_slots
613 );
614 }
615 if cost.is_move_reg {
616 s.ip = next_ip;
617 } else {
618 s.rob.push(RobEntry {
619 state: RobState::Wait,
620 cycles_left: cost.cycles,
621 deps,
622 dep_count,
623 dest_regs: cost.dest_regs,
624 exec_units: cost.exec_units,
625 });
626 s.ip = next_ip;
627 }
628 continue;
629 }
630
631 if s.dispatch_slots > 0
633 && let Some(idx) = find_ready_entry(&s.rob, s.exec_units)
634 {
635 let eu = s.rob[idx].exec_units;
636
637 if trace {
638 trace_eprintln!(
639 " [{}] DISPATCH rob[{}] cy={} dispatch_left={}",
640 iter,
641 idx,
642 s.rob[idx].cycles_left,
643 s.dispatch_slots - 1
644 );
645 }
646 s.rob[idx].state = RobState::Exe;
647 s.dispatch_slots -= 1;
648 s.exec_units = s.exec_units.sub(eu);
649 continue;
650 }
651
652 if s.ip.is_none() && rob_all_finished(&s.rob) {
654 if trace {
655 trace_eprintln!(" [{}] DONE cycles={}", iter, s.cycles);
656 }
657 break;
658 }
659
660 if trace {
663 let states: Vec<String> = s
664 .rob
665 .iter()
666 .enumerate()
667 .map(|(i, e)| {
668 let st = match e.state {
669 RobState::Wait => "W",
670 RobState::Exe => "E",
671 RobState::Fin => "F",
672 };
673 format!(
674 "{}:{}{}",
675 i,
676 st,
677 if e.state == RobState::Exe {
678 format!("({})", e.cycles_left)
679 } else {
680 String::new()
681 }
682 )
683 })
684 .collect();
685 trace_eprintln!(
686 " [{}] ADVANCE cycle {} → {} rob=[{}]",
687 iter,
688 s.cycles,
689 s.cycles + 1,
690 states.join(", ")
691 );
692 }
693 for entry in s.rob.iter_mut() {
694 if entry.state == RobState::Exe {
695 if entry.cycles_left <= 1 {
696 entry.state = RobState::Fin;
697 entry.cycles_left = 0;
698 } else {
699 entry.cycles_left -= 1;
700 }
701 }
702 }
703 s.cycles += 1;
704 s.decode_slots = 4;
705 s.dispatch_slots = 5;
706 s.exec_units = ExecUnits::RESET;
707 }
708
709 s.cycles
710}
711
712fn gas_sim(code: &[u8], bitmask: &[u8], start_pc: usize) -> u32 {
713 gas_sim_traced(code, bitmask, start_pc, false)
714}
715
716pub fn gas_cost_for_block(code: &[u8], bitmask: &[u8], start_pc: usize) -> u64 {
719 let cycles = gas_sim(code, bitmask, start_pc);
720 if cycles > 3 { (cycles - 3) as u64 } else { 1 }
721}
722
723#[cfg(all(target_os = "linux", target_arch = "x86_64"))]
729pub fn gas_cost_for_block_decoded(
730 instrs: &[crate::predecoded::PreDecodedInst],
731 code: &[u8],
732 bitmask: &[u8],
733) -> u64 {
734 let cycles = gas_sim_decoded(instrs, code, bitmask);
735 if cycles > 3 { (cycles - 3) as u64 } else { 1 }
736}
737
738#[cfg(all(target_os = "linux", target_arch = "x86_64"))]
740fn gas_sim_decoded(
741 instrs: &[crate::predecoded::PreDecodedInst],
742 code: &[u8],
743 bitmask: &[u8],
744) -> u32 {
745 use crate::args::Args;
746
747 let mut s = SimState {
748 ip: Some(0), cycles: 0,
750 decode_slots: 4,
751 dispatch_slots: 5,
752 exec_units: ExecUnits::RESET,
753 rob: Vec::with_capacity(32),
754 };
755
756 for _ in 0..100_000 {
757 if let Some(idx) = s.ip
758 && idx < instrs.len()
759 && s.decode_slots > 0
760 && s.rob.len() < 32
761 {
762 let instr = &instrs[idx];
763 let opcode_byte = instr.opcode as u8;
764
765 let (ra, rb, rd) = match instr.args {
766 Args::ThreeReg { ra, rb, rd } => (ra as u8, rb as u8, rd as u8),
767 Args::TwoReg { rd: d, ra: a } => (a as u8, 0xFF, d as u8),
768 Args::TwoRegImm { ra, rb, .. }
769 | Args::TwoRegOffset { ra, rb, .. }
770 | Args::TwoRegTwoImm { ra, rb, .. } => (ra as u8, rb as u8, 0xFF),
771 Args::RegImm { ra, .. }
772 | Args::RegExtImm { ra, .. }
773 | Args::RegTwoImm { ra, .. }
774 | Args::RegImmOffset { ra, .. } => (ra as u8, 0xFF, 0xFF),
775 _ => (0xFF, 0xFF, 0xFF),
776 };
777
778 let cost = instruction_cost_fast(opcode_byte, ra, rb, rd, instr, code, bitmask);
779
780 let mut deps = [0xFF_u8; 4];
781 let mut dep_count = 0u8;
782 for (i, e) in s.rob.iter().enumerate() {
783 if e.state != RobState::Fin
784 && e.dest_regs.iter().any(|dr| cost.src_regs.contains(*dr))
785 && dep_count < 4
786 {
787 deps[dep_count as usize] = i as u8;
788 dep_count += 1;
789 }
790 }
791
792 s.decode_slots = s.decode_slots.saturating_sub(cost.decode_slots);
793 let next_ip = if cost.is_terminator {
794 None
795 } else {
796 Some(idx + 1)
797 };
798
799 if cost.is_move_reg {
800 s.ip = next_ip;
801 } else {
802 s.rob.push(RobEntry {
803 state: RobState::Wait,
804 cycles_left: cost.cycles,
805 deps,
806 dep_count,
807 dest_regs: cost.dest_regs,
808 exec_units: cost.exec_units,
809 });
810 s.ip = next_ip;
811 }
812 continue;
813 }
814
815 if s.dispatch_slots > 0
816 && let Some(idx) = find_ready_entry(&s.rob, s.exec_units)
817 {
818 let eu = s.rob[idx].exec_units;
819 s.rob[idx].state = RobState::Exe;
820 s.dispatch_slots -= 1;
821 s.exec_units = s.exec_units.sub(eu);
822 continue;
823 }
824
825 if s.ip.is_none_or(|i| i >= instrs.len()) && rob_all_finished(&s.rob) {
826 break;
827 }
828
829 for entry in s.rob.iter_mut() {
830 if entry.state == RobState::Exe {
831 if entry.cycles_left <= 1 {
832 entry.state = RobState::Fin;
833 entry.cycles_left = 0;
834 } else {
835 entry.cycles_left -= 1;
836 }
837 }
838 }
839 s.cycles += 1;
840 s.decode_slots = 4;
841 s.dispatch_slots = 5;
842 s.exec_units = ExecUnits::RESET;
843 }
844
845 s.cycles
846}
847
848#[cfg(all(target_os = "linux", target_arch = "x86_64"))]
851fn instruction_cost_fast(
852 opcode: u8,
853 ra: u8,
854 rb: u8,
855 rd: u8,
856 instr: &crate::predecoded::PreDecodedInst,
857 code: &[u8],
858 bitmask: &[u8],
859) -> InstrCost {
860 let mk = |cy: u32, dc: u8, eu: ExecUnits, dst: RegSet, src: RegSet| -> InstrCost {
861 InstrCost {
862 cycles: cy,
863 decode_slots: dc,
864 exec_units: eu,
865 dest_regs: dst,
866 src_regs: src,
867 is_terminator: false,
868 is_move_reg: false,
869 }
870 };
871 let mkt = |cy: u32, dc: u8, eu: ExecUnits, dst: RegSet, src: RegSet| -> InstrCost {
872 InstrCost {
873 cycles: cy,
874 decode_slots: dc,
875 exec_units: eu,
876 dest_regs: dst,
877 src_regs: src,
878 is_terminator: true,
879 is_move_reg: false,
880 }
881 };
882 let e = RegSet::EMPTY;
883 let r1 = RegSet::one;
884 let r2 = RegSet::two;
885
886 match opcode {
887 0 => mkt(2, 1, ExecUnits::NONE, e, e),
888 1 => mkt(2, 1, ExecUnits::NONE, e, e),
889 2 => mkt(40, 1, ExecUnits::NONE, e, e),
890 10 => mkt(100, 4, ExecUnits::ALU, e, e),
891 40 => mkt(15, 1, ExecUnits::ALU, e, e),
892 80 => mkt(15, 1, ExecUnits::ALU, r1(ra), e),
893 50 => mkt(22, 1, ExecUnits::ALU, e, e),
894 180 => mkt(22, 1, ExecUnits::ALU, r1(ra), r1(rb)),
895 52..=58 => mk(25, 1, ExecUnits::LOAD, r1(ra), r1(rb)),
896 124..=130 => mk(25, 1, ExecUnits::LOAD, r1(ra), r1(rb)),
897 59..=62 => mk(25, 1, ExecUnits::STORE, e, r2(ra, rb)),
898 120..=123 => mk(25, 1, ExecUnits::STORE, e, r2(ra, rb)),
899 30..=33 => mk(25, 1, ExecUnits::STORE, e, e),
900 70..=73 => mk(25, 1, ExecUnits::STORE, e, r1(ra)),
901 51 => mk(1, 1, ExecUnits::NONE, r1(ra), e),
902 20 => mk(1, 2, ExecUnits::NONE, r1(ra), e),
903 100 => InstrCost {
904 cycles: 0,
905 decode_slots: 1,
906 exec_units: ExecUnits::NONE,
907 dest_regs: r1(ra),
908 src_regs: r1(rb),
909 is_terminator: false,
910 is_move_reg: true,
911 },
912 101 => mk(2, 1, ExecUnits::NONE, e, e),
913 81..=90 => {
914 let target = match instr.args {
915 crate::args::Args::RegImmOffset { offset, .. } => offset as usize,
916 _ => instr.pc as usize,
917 };
918 let bc = branch_cost(code, bitmask, target);
919 mkt(bc, 1, ExecUnits::ALU, e, r1(ra))
920 }
921 170..=175 => {
922 let target = match instr.args {
923 crate::args::Args::TwoRegOffset { offset, .. } => offset as usize,
924 _ => instr.pc as usize,
925 };
926 let bc = branch_cost(code, bitmask, target);
927 mkt(bc, 1, ExecUnits::ALU, e, r2(ra, rb))
928 }
929 200 | 201 | 210 | 211 | 212 => {
930 let dc = if dst_overlaps_src(ra, &r2(rb, rd)) {
931 1
932 } else {
933 2
934 };
935 mk(1, dc, ExecUnits::ALU, r1(ra), r2(rb, rd))
936 }
937 190 | 191 => {
938 let dc = if dst_overlaps_src(ra, &r2(rb, rd)) {
939 2
940 } else {
941 3
942 };
943 mk(2, dc, ExecUnits::ALU, r1(ra), r2(rb, rd))
944 }
945 132 | 133 | 134 | 149 | 151 | 152 | 153 | 158 | 110 => {
946 let dc = if dst_overlaps_src(ra, &r1(rb)) { 1 } else { 2 };
947 mk(1, dc, ExecUnits::ALU, r1(ra), r1(rb))
948 }
949 131 | 138 | 139 | 140 | 160 => {
950 let dc = if dst_overlaps_src(ra, &r1(rb)) { 2 } else { 3 };
951 mk(2, dc, ExecUnits::ALU, r1(ra), r1(rb))
952 }
953 102 | 103 | 104 | 105 | 108 | 109 => mk(1, 1, ExecUnits::ALU, r1(ra), r1(rb)),
954 106 | 107 => mk(2, 1, ExecUnits::ALU, r1(ra), r1(rb)),
955 111 => mk(1, 1, ExecUnits::ALU, r1(ra), r1(rb)),
956 207 | 208 | 209 | 220 | 222 => {
957 let dc = if rb == ra { 2 } else { 3 };
958 mk(1, dc, ExecUnits::ALU, r1(ra), r2(rb, rd))
959 }
960 197 | 198 | 199 | 221 | 223 => {
961 let dc = if rb == ra { 3 } else { 4 };
962 mk(2, dc, ExecUnits::ALU, r1(ra), r2(rb, rd))
963 }
964 155 | 156 | 157 | 159 => mk(1, 3, ExecUnits::ALU, r1(ra), r1(rb)),
965 144 | 145 | 146 | 161 => mk(2, 4, ExecUnits::ALU, r1(ra), r1(rb)),
966 216 | 217 => mk(3, 3, ExecUnits::ALU, r1(ra), r2(rb, rd)),
967 136 | 137 | 142 | 143 => mk(3, 3, ExecUnits::ALU, r1(ra), r1(rb)),
968 218 | 219 => mk(2, 2, ExecUnits::ALU, r1(ra), r2(rb, rd)),
969 147 | 148 => mk(2, 3, ExecUnits::ALU, r1(ra), r1(rb)),
970 227..=230 => {
971 let dc = if dst_overlaps_src(ra, &r2(rb, rd)) {
972 2
973 } else {
974 3
975 };
976 mk(3, dc, ExecUnits::ALU, r1(ra), r2(rb, rd))
977 }
978 224 | 225 => mk(2, 3, ExecUnits::ALU, r1(ra), r2(rb, rd)),
979 226 => {
980 let dc = if dst_overlaps_src(ra, &r2(rb, rd)) {
981 2
982 } else {
983 3
984 };
985 mk(2, dc, ExecUnits::ALU, r1(ra), r2(rb, rd))
986 }
987 154 => mk(2, 3, ExecUnits::ALU, r1(ra), r1(rb)),
988 141 => mk(3, 4, ExecUnits::ALU, r1(ra), r1(rb)),
989 202 => {
990 let dc = if dst_overlaps_src(ra, &r2(rb, rd)) {
991 1
992 } else {
993 2
994 };
995 mk(3, dc, ExecUnits::MUL, r1(ra), r2(rb, rd))
996 }
997 150 => {
998 let dc = if dst_overlaps_src(ra, &r1(rb)) { 1 } else { 2 };
999 mk(3, dc, ExecUnits::MUL, r1(ra), r1(rb))
1000 }
1001 192 => {
1002 let dc = if dst_overlaps_src(ra, &r2(rb, rd)) {
1003 2
1004 } else {
1005 3
1006 };
1007 mk(4, dc, ExecUnits::MUL, r1(ra), r2(rb, rd))
1008 }
1009 135 => {
1010 let dc = if dst_overlaps_src(ra, &r1(rb)) { 2 } else { 3 };
1011 mk(4, dc, ExecUnits::MUL, r1(ra), r1(rb))
1012 }
1013 213 | 214 => mk(4, 4, ExecUnits::MUL, r1(ra), r2(rb, rd)),
1014 215 => mk(6, 4, ExecUnits::MUL, r1(ra), r2(rb, rd)),
1015 193 | 194 | 195 | 196 | 203 | 204 | 205 | 206 => {
1016 mk(60, 4, ExecUnits::DIV, r1(ra), r2(rb, rd))
1017 }
1018 _ => mk(1, 1, ExecUnits::NONE, e, e),
1019 }
1020}
1021
1022#[derive(Clone, Copy, Debug, Default)]
1036pub struct FastCost {
1037 pub cycles: u8,
1038 pub decode_slots: u8,
1039 pub exec_unit: u8,
1041 pub src_mask: u16,
1042 pub dst_mask: u16,
1043 pub is_terminator: bool,
1044 pub is_move_reg: bool,
1045}
1046
1047const EU_NONE: u8 = 0;
1048const EU_ALU: u8 = 1;
1049const EU_LOAD: u8 = 2;
1050const EU_STORE: u8 = 3;
1051const EU_MUL: u8 = 4;
1052const EU_DIV: u8 = 5;
1053
1054#[inline(always)]
1055fn reg_bit(r: u8) -> u16 {
1056 1u16 << r.min(12)
1058}
1059
1060fn extract_branch_target_raw(code: &[u8], bitmask: &[u8], pc: usize) -> usize {
1063 let skip = {
1064 let mut s = 0;
1065 for j in 0..25 {
1066 let idx = pc + 1 + j;
1067 if idx >= bitmask.len() || bitmask[idx] == 1 {
1068 s = j;
1069 break;
1070 }
1071 }
1072 s
1073 };
1074 let opcode = code[pc];
1075 let cat = crate::instruction::Opcode::from_byte(opcode)
1077 .map(|o| o.category())
1078 .unwrap_or(crate::instruction::InstructionCategory::NoArgs);
1079 let args = crate::args::decode_args(code, pc, skip, cat);
1080 match args {
1081 crate::args::Args::RegImmOffset { offset, .. } => offset as usize,
1082 crate::args::Args::TwoRegOffset { offset, .. } => offset as usize,
1083 crate::args::Args::Offset { offset } => offset as usize,
1084 _ => pc,
1085 }
1086}
1087
1088pub const DEFAULT_MEM_CYCLES: u8 = 25;
1092
1093#[allow(clippy::too_many_arguments)]
1094pub fn fast_cost_from_raw(
1095 opcode_byte: u8,
1096 ra: u8,
1097 rb: u8,
1098 rd: u8,
1099 pc: u32,
1100 code: &[u8],
1101 bitmask: &[u8],
1102 mem_cycles: u8,
1103) -> FastCost {
1104 let r1 = |r: u8| reg_bit(r);
1105 let r2 = |a: u8, b: u8| reg_bit(a) | reg_bit(b);
1106 let dst_src_overlap = |dst: u8, s: u16| (reg_bit(dst) & s) != 0;
1107
1108 let opcode = opcode_byte;
1109 match opcode {
1110 0 => FastCost {
1112 cycles: 2,
1113 decode_slots: 1,
1114 exec_unit: EU_NONE,
1115 src_mask: 0,
1116 dst_mask: 0,
1117 is_terminator: true,
1118 is_move_reg: false,
1119 },
1120 1 => FastCost {
1121 cycles: 2,
1122 decode_slots: 1,
1123 exec_unit: EU_NONE,
1124 src_mask: 0,
1125 dst_mask: 0,
1126 is_terminator: true,
1127 is_move_reg: false,
1128 },
1129 2 => FastCost {
1130 cycles: 40,
1131 decode_slots: 1,
1132 exec_unit: EU_NONE,
1133 src_mask: 0,
1134 dst_mask: 0,
1135 is_terminator: true,
1136 is_move_reg: false,
1137 },
1138 3 => FastCost {
1142 cycles: 100,
1143 decode_slots: 4,
1144 exec_unit: EU_ALU,
1145 src_mask: 0,
1146 dst_mask: 0,
1147 is_terminator: true,
1148 is_move_reg: false,
1149 },
1150 10 => FastCost {
1151 cycles: 100,
1152 decode_slots: 4,
1153 exec_unit: EU_ALU,
1154 src_mask: 0,
1155 dst_mask: 0,
1156 is_terminator: true,
1157 is_move_reg: false,
1158 },
1159
1160 40 => FastCost {
1162 cycles: 15,
1163 decode_slots: 1,
1164 exec_unit: EU_ALU,
1165 src_mask: 0,
1166 dst_mask: 0,
1167 is_terminator: true,
1168 is_move_reg: false,
1169 },
1170 80 => FastCost {
1171 cycles: 15,
1172 decode_slots: 1,
1173 exec_unit: EU_ALU,
1174 src_mask: 0,
1175 dst_mask: r1(ra),
1176 is_terminator: true,
1177 is_move_reg: false,
1178 },
1179 50 => FastCost {
1180 cycles: 22,
1181 decode_slots: 1,
1182 exec_unit: EU_ALU,
1183 src_mask: 0,
1184 dst_mask: 0,
1185 is_terminator: true,
1186 is_move_reg: false,
1187 },
1188 180 => FastCost {
1189 cycles: 22,
1190 decode_slots: 1,
1191 exec_unit: EU_ALU,
1192 src_mask: r1(rb),
1193 dst_mask: r1(ra),
1194 is_terminator: true,
1195 is_move_reg: false,
1196 },
1197
1198 52..=58 => FastCost {
1200 cycles: mem_cycles,
1201 decode_slots: 1,
1202 exec_unit: EU_LOAD,
1203 src_mask: r1(rb),
1204 dst_mask: r1(ra),
1205 is_terminator: false,
1206 is_move_reg: false,
1207 },
1208 124..=130 => FastCost {
1209 cycles: mem_cycles,
1210 decode_slots: 1,
1211 exec_unit: EU_LOAD,
1212 src_mask: r1(rb),
1213 dst_mask: r1(ra),
1214 is_terminator: false,
1215 is_move_reg: false,
1216 },
1217
1218 59..=62 => FastCost {
1220 cycles: mem_cycles,
1221 decode_slots: 1,
1222 exec_unit: EU_STORE,
1223 src_mask: r2(ra, rb),
1224 dst_mask: 0,
1225 is_terminator: false,
1226 is_move_reg: false,
1227 },
1228 120..=123 => FastCost {
1229 cycles: mem_cycles,
1230 decode_slots: 1,
1231 exec_unit: EU_STORE,
1232 src_mask: r2(ra, rb),
1233 dst_mask: 0,
1234 is_terminator: false,
1235 is_move_reg: false,
1236 },
1237 30..=33 => FastCost {
1238 cycles: mem_cycles,
1239 decode_slots: 1,
1240 exec_unit: EU_STORE,
1241 src_mask: 0,
1242 dst_mask: 0,
1243 is_terminator: false,
1244 is_move_reg: false,
1245 },
1246 70..=73 => FastCost {
1247 cycles: mem_cycles,
1248 decode_slots: 1,
1249 exec_unit: EU_STORE,
1250 src_mask: r1(ra),
1251 dst_mask: 0,
1252 is_terminator: false,
1253 is_move_reg: false,
1254 },
1255
1256 51 => FastCost {
1258 cycles: 1,
1259 decode_slots: 1,
1260 exec_unit: EU_NONE,
1261 src_mask: 0,
1262 dst_mask: r1(ra),
1263 is_terminator: false,
1264 is_move_reg: false,
1265 },
1266 20 => FastCost {
1267 cycles: 1,
1268 decode_slots: 2,
1269 exec_unit: EU_NONE,
1270 src_mask: 0,
1271 dst_mask: r1(ra),
1272 is_terminator: false,
1273 is_move_reg: false,
1274 },
1275
1276 100 => FastCost {
1278 cycles: 0,
1279 decode_slots: 1,
1280 exec_unit: EU_NONE,
1281 src_mask: r1(rb),
1282 dst_mask: r1(ra),
1283 is_terminator: false,
1284 is_move_reg: true,
1285 },
1286
1287 101 => FastCost {
1288 cycles: 2,
1289 decode_slots: 1,
1290 exec_unit: EU_NONE,
1291 src_mask: 0,
1292 dst_mask: 0,
1293 is_terminator: false,
1294 is_move_reg: false,
1295 },
1296
1297 81..=90 => {
1299 let target = extract_branch_target_raw(code, bitmask, pc as usize);
1300 let bc = branch_cost(code, bitmask, target);
1301 FastCost {
1302 cycles: bc as u8,
1303 decode_slots: 1,
1304 exec_unit: EU_ALU,
1305 src_mask: r1(ra),
1306 dst_mask: 0,
1307 is_terminator: true,
1308 is_move_reg: false,
1309 }
1310 }
1311 170..=175 => {
1313 let target = extract_branch_target_raw(code, bitmask, pc as usize);
1314 let bc = branch_cost(code, bitmask, target);
1315 FastCost {
1316 cycles: bc as u8,
1317 decode_slots: 1,
1318 exec_unit: EU_ALU,
1319 src_mask: r2(ra, rb),
1320 dst_mask: 0,
1321 is_terminator: true,
1322 is_move_reg: false,
1323 }
1324 }
1325
1326 200 | 201 | 210 | 211 | 212 => {
1328 let s = r2(rb, rd);
1329 let dc = if dst_src_overlap(ra, s) { 1 } else { 2 };
1330 FastCost {
1331 cycles: 1,
1332 decode_slots: dc,
1333 exec_unit: EU_ALU,
1334 src_mask: s,
1335 dst_mask: r1(ra),
1336 is_terminator: false,
1337 is_move_reg: false,
1338 }
1339 }
1340 190 | 191 => {
1342 let s = r2(rb, rd);
1343 let dc = if dst_src_overlap(ra, s) { 2 } else { 3 };
1344 FastCost {
1345 cycles: 2,
1346 decode_slots: dc,
1347 exec_unit: EU_ALU,
1348 src_mask: s,
1349 dst_mask: r1(ra),
1350 is_terminator: false,
1351 is_move_reg: false,
1352 }
1353 }
1354 132 | 133 | 134 | 149 | 151 | 152 | 153 | 158 | 110 => {
1356 let dc = if dst_src_overlap(ra, r1(rb)) { 1 } else { 2 };
1357 FastCost {
1358 cycles: 1,
1359 decode_slots: dc,
1360 exec_unit: EU_ALU,
1361 src_mask: r1(rb),
1362 dst_mask: r1(ra),
1363 is_terminator: false,
1364 is_move_reg: false,
1365 }
1366 }
1367 131 | 138 | 139 | 140 | 160 => {
1369 let dc = if dst_src_overlap(ra, r1(rb)) { 2 } else { 3 };
1370 FastCost {
1371 cycles: 2,
1372 decode_slots: dc,
1373 exec_unit: EU_ALU,
1374 src_mask: r1(rb),
1375 dst_mask: r1(ra),
1376 is_terminator: false,
1377 is_move_reg: false,
1378 }
1379 }
1380 102 | 103 | 104 | 105 | 108 | 109 | 111 => FastCost {
1382 cycles: 1,
1383 decode_slots: 1,
1384 exec_unit: EU_ALU,
1385 src_mask: r1(rb),
1386 dst_mask: r1(ra),
1387 is_terminator: false,
1388 is_move_reg: false,
1389 },
1390 106 | 107 => FastCost {
1392 cycles: 2,
1393 decode_slots: 1,
1394 exec_unit: EU_ALU,
1395 src_mask: r1(rb),
1396 dst_mask: r1(ra),
1397 is_terminator: false,
1398 is_move_reg: false,
1399 },
1400
1401 207 | 208 | 209 | 220 | 222 => {
1403 let dc = if rb == ra { 2 } else { 3 };
1404 FastCost {
1405 cycles: 1,
1406 decode_slots: dc,
1407 exec_unit: EU_ALU,
1408 src_mask: r2(rb, rd),
1409 dst_mask: r1(ra),
1410 is_terminator: false,
1411 is_move_reg: false,
1412 }
1413 }
1414 197 | 198 | 199 | 221 | 223 => {
1416 let dc = if rb == ra { 3 } else { 4 };
1417 FastCost {
1418 cycles: 2,
1419 decode_slots: dc,
1420 exec_unit: EU_ALU,
1421 src_mask: r2(rb, rd),
1422 dst_mask: r1(ra),
1423 is_terminator: false,
1424 is_move_reg: false,
1425 }
1426 }
1427 155 | 156 | 157 | 159 => FastCost {
1429 cycles: 1,
1430 decode_slots: 3,
1431 exec_unit: EU_ALU,
1432 src_mask: r1(rb),
1433 dst_mask: r1(ra),
1434 is_terminator: false,
1435 is_move_reg: false,
1436 },
1437 144 | 145 | 146 | 161 => FastCost {
1439 cycles: 2,
1440 decode_slots: 4,
1441 exec_unit: EU_ALU,
1442 src_mask: r1(rb),
1443 dst_mask: r1(ra),
1444 is_terminator: false,
1445 is_move_reg: false,
1446 },
1447
1448 216 | 217 => FastCost {
1450 cycles: 3,
1451 decode_slots: 3,
1452 exec_unit: EU_ALU,
1453 src_mask: r2(rb, rd),
1454 dst_mask: r1(ra),
1455 is_terminator: false,
1456 is_move_reg: false,
1457 },
1458 136 | 137 | 142 | 143 => FastCost {
1460 cycles: 3,
1461 decode_slots: 3,
1462 exec_unit: EU_ALU,
1463 src_mask: r1(rb),
1464 dst_mask: r1(ra),
1465 is_terminator: false,
1466 is_move_reg: false,
1467 },
1468
1469 218 | 219 => FastCost {
1471 cycles: 2,
1472 decode_slots: 2,
1473 exec_unit: EU_ALU,
1474 src_mask: r2(rb, rd),
1475 dst_mask: r1(ra),
1476 is_terminator: false,
1477 is_move_reg: false,
1478 },
1479 147 | 148 => FastCost {
1481 cycles: 2,
1482 decode_slots: 3,
1483 exec_unit: EU_ALU,
1484 src_mask: r1(rb),
1485 dst_mask: r1(ra),
1486 is_terminator: false,
1487 is_move_reg: false,
1488 },
1489
1490 227..=230 => {
1492 let s = r2(rb, rd);
1493 let dc = if dst_src_overlap(ra, s) { 2 } else { 3 };
1494 FastCost {
1495 cycles: 3,
1496 decode_slots: dc,
1497 exec_unit: EU_ALU,
1498 src_mask: s,
1499 dst_mask: r1(ra),
1500 is_terminator: false,
1501 is_move_reg: false,
1502 }
1503 }
1504 224 | 225 => FastCost {
1506 cycles: 2,
1507 decode_slots: 3,
1508 exec_unit: EU_ALU,
1509 src_mask: r2(rb, rd),
1510 dst_mask: r1(ra),
1511 is_terminator: false,
1512 is_move_reg: false,
1513 },
1514 226 => {
1516 let s = r2(rb, rd);
1517 let dc = if dst_src_overlap(ra, s) { 2 } else { 3 };
1518 FastCost {
1519 cycles: 2,
1520 decode_slots: dc,
1521 exec_unit: EU_ALU,
1522 src_mask: s,
1523 dst_mask: r1(ra),
1524 is_terminator: false,
1525 is_move_reg: false,
1526 }
1527 }
1528 154 => FastCost {
1530 cycles: 2,
1531 decode_slots: 3,
1532 exec_unit: EU_ALU,
1533 src_mask: r1(rb),
1534 dst_mask: r1(ra),
1535 is_terminator: false,
1536 is_move_reg: false,
1537 },
1538 141 => FastCost {
1539 cycles: 3,
1540 decode_slots: 4,
1541 exec_unit: EU_ALU,
1542 src_mask: r1(rb),
1543 dst_mask: r1(ra),
1544 is_terminator: false,
1545 is_move_reg: false,
1546 },
1547
1548 202 => {
1550 let s = r2(rb, rd);
1551 let dc = if dst_src_overlap(ra, s) { 1 } else { 2 };
1552 FastCost {
1553 cycles: 3,
1554 decode_slots: dc,
1555 exec_unit: EU_MUL,
1556 src_mask: s,
1557 dst_mask: r1(ra),
1558 is_terminator: false,
1559 is_move_reg: false,
1560 }
1561 }
1562 150 => {
1564 let dc = if dst_src_overlap(ra, r1(rb)) { 1 } else { 2 };
1565 FastCost {
1566 cycles: 3,
1567 decode_slots: dc,
1568 exec_unit: EU_MUL,
1569 src_mask: r1(rb),
1570 dst_mask: r1(ra),
1571 is_terminator: false,
1572 is_move_reg: false,
1573 }
1574 }
1575 192 => {
1577 let s = r2(rb, rd);
1578 let dc = if dst_src_overlap(ra, s) { 2 } else { 3 };
1579 FastCost {
1580 cycles: 4,
1581 decode_slots: dc,
1582 exec_unit: EU_MUL,
1583 src_mask: s,
1584 dst_mask: r1(ra),
1585 is_terminator: false,
1586 is_move_reg: false,
1587 }
1588 }
1589 135 => {
1591 let dc = if dst_src_overlap(ra, r1(rb)) { 2 } else { 3 };
1592 FastCost {
1593 cycles: 4,
1594 decode_slots: dc,
1595 exec_unit: EU_MUL,
1596 src_mask: r1(rb),
1597 dst_mask: r1(ra),
1598 is_terminator: false,
1599 is_move_reg: false,
1600 }
1601 }
1602 213 | 214 => FastCost {
1604 cycles: 4,
1605 decode_slots: 4,
1606 exec_unit: EU_MUL,
1607 src_mask: r2(rb, rd),
1608 dst_mask: r1(ra),
1609 is_terminator: false,
1610 is_move_reg: false,
1611 },
1612 215 => FastCost {
1613 cycles: 6,
1614 decode_slots: 4,
1615 exec_unit: EU_MUL,
1616 src_mask: r2(rb, rd),
1617 dst_mask: r1(ra),
1618 is_terminator: false,
1619 is_move_reg: false,
1620 },
1621
1622 193 | 194 | 195 | 196 | 203 | 204 | 205 | 206 => FastCost {
1624 cycles: 60,
1625 decode_slots: 4,
1626 exec_unit: EU_DIV,
1627 src_mask: r2(rb, rd),
1628 dst_mask: r1(ra),
1629 is_terminator: false,
1630 is_move_reg: false,
1631 },
1632
1633 _ => FastCost {
1635 cycles: 1,
1636 decode_slots: 1,
1637 exec_unit: EU_NONE,
1638 src_mask: 0,
1639 dst_mask: 0,
1640 is_terminator: false,
1641 is_move_reg: false,
1642 },
1643 }
1644}
1645
1646#[inline(always)]
1652pub fn fast_cost_from_decoded(
1653 opcode_byte: u8,
1654 args: &crate::args::Args,
1655 pc: u32,
1656 code: &[u8],
1657 bitmask: &[u8],
1658 mem_cycles: u8,
1659) -> FastCost {
1660 use crate::args::Args;
1661
1662 let pcu = pc as usize;
1666 let ra = if pcu + 1 < code.len() {
1667 code[pcu + 1] & 0x0F
1668 } else {
1669 0xFF
1670 };
1671 let rb = if pcu + 1 < code.len() {
1672 (code[pcu + 1] >> 4) & 0x0F
1673 } else {
1674 0xFF
1675 };
1676 let rd = if pcu + 2 < code.len() {
1677 code[pcu + 2] & 0x0F
1678 } else {
1679 0xFF
1680 };
1681
1682 let branch_target = match args {
1685 Args::RegImmOffset { offset, .. } => *offset as usize,
1686 Args::TwoRegOffset { offset, .. } => *offset as usize,
1687 Args::Offset { offset } => *offset as usize,
1688 _ => pcu,
1689 };
1690
1691 let r1 = |r: u8| reg_bit(r);
1692 let r2 = |a: u8, b: u8| reg_bit(a) | reg_bit(b);
1693 let dst_src_overlap = |dst: u8, s: u16| (reg_bit(dst) & s) != 0;
1694
1695 let opcode = opcode_byte;
1696 match opcode {
1697 0 => FastCost {
1699 cycles: 2,
1700 decode_slots: 1,
1701 exec_unit: EU_NONE,
1702 src_mask: 0,
1703 dst_mask: 0,
1704 is_terminator: true,
1705 is_move_reg: false,
1706 },
1707 1 => FastCost {
1708 cycles: 2,
1709 decode_slots: 1,
1710 exec_unit: EU_NONE,
1711 src_mask: 0,
1712 dst_mask: 0,
1713 is_terminator: true,
1714 is_move_reg: false,
1715 },
1716 2 => FastCost {
1717 cycles: 40,
1718 decode_slots: 1,
1719 exec_unit: EU_NONE,
1720 src_mask: 0,
1721 dst_mask: 0,
1722 is_terminator: true,
1723 is_move_reg: false,
1724 },
1725 3 => FastCost {
1729 cycles: 100,
1730 decode_slots: 4,
1731 exec_unit: EU_ALU,
1732 src_mask: 0,
1733 dst_mask: 0,
1734 is_terminator: true,
1735 is_move_reg: false,
1736 },
1737 10 => FastCost {
1738 cycles: 100,
1739 decode_slots: 4,
1740 exec_unit: EU_ALU,
1741 src_mask: 0,
1742 dst_mask: 0,
1743 is_terminator: true,
1744 is_move_reg: false,
1745 },
1746
1747 40 => FastCost {
1749 cycles: 15,
1750 decode_slots: 1,
1751 exec_unit: EU_ALU,
1752 src_mask: 0,
1753 dst_mask: 0,
1754 is_terminator: true,
1755 is_move_reg: false,
1756 },
1757 80 => FastCost {
1758 cycles: 15,
1759 decode_slots: 1,
1760 exec_unit: EU_ALU,
1761 src_mask: 0,
1762 dst_mask: r1(ra),
1763 is_terminator: true,
1764 is_move_reg: false,
1765 },
1766 50 => FastCost {
1767 cycles: 22,
1768 decode_slots: 1,
1769 exec_unit: EU_ALU,
1770 src_mask: 0,
1771 dst_mask: 0,
1772 is_terminator: true,
1773 is_move_reg: false,
1774 },
1775 180 => FastCost {
1776 cycles: 22,
1777 decode_slots: 1,
1778 exec_unit: EU_ALU,
1779 src_mask: r1(rb),
1780 dst_mask: r1(ra),
1781 is_terminator: true,
1782 is_move_reg: false,
1783 },
1784
1785 52..=58 => FastCost {
1787 cycles: mem_cycles,
1788 decode_slots: 1,
1789 exec_unit: EU_LOAD,
1790 src_mask: r1(rb),
1791 dst_mask: r1(ra),
1792 is_terminator: false,
1793 is_move_reg: false,
1794 },
1795 124..=130 => FastCost {
1796 cycles: mem_cycles,
1797 decode_slots: 1,
1798 exec_unit: EU_LOAD,
1799 src_mask: r1(rb),
1800 dst_mask: r1(ra),
1801 is_terminator: false,
1802 is_move_reg: false,
1803 },
1804
1805 59..=62 => FastCost {
1807 cycles: mem_cycles,
1808 decode_slots: 1,
1809 exec_unit: EU_STORE,
1810 src_mask: r2(ra, rb),
1811 dst_mask: 0,
1812 is_terminator: false,
1813 is_move_reg: false,
1814 },
1815 120..=123 => FastCost {
1816 cycles: mem_cycles,
1817 decode_slots: 1,
1818 exec_unit: EU_STORE,
1819 src_mask: r2(ra, rb),
1820 dst_mask: 0,
1821 is_terminator: false,
1822 is_move_reg: false,
1823 },
1824 30..=33 => FastCost {
1825 cycles: mem_cycles,
1826 decode_slots: 1,
1827 exec_unit: EU_STORE,
1828 src_mask: 0,
1829 dst_mask: 0,
1830 is_terminator: false,
1831 is_move_reg: false,
1832 },
1833 70..=73 => FastCost {
1834 cycles: mem_cycles,
1835 decode_slots: 1,
1836 exec_unit: EU_STORE,
1837 src_mask: r1(ra),
1838 dst_mask: 0,
1839 is_terminator: false,
1840 is_move_reg: false,
1841 },
1842
1843 51 => FastCost {
1845 cycles: 1,
1846 decode_slots: 1,
1847 exec_unit: EU_NONE,
1848 src_mask: 0,
1849 dst_mask: r1(ra),
1850 is_terminator: false,
1851 is_move_reg: false,
1852 },
1853 20 => FastCost {
1854 cycles: 1,
1855 decode_slots: 2,
1856 exec_unit: EU_NONE,
1857 src_mask: 0,
1858 dst_mask: r1(ra),
1859 is_terminator: false,
1860 is_move_reg: false,
1861 },
1862
1863 100 => FastCost {
1865 cycles: 0,
1866 decode_slots: 1,
1867 exec_unit: EU_NONE,
1868 src_mask: r1(rb),
1869 dst_mask: r1(ra),
1870 is_terminator: false,
1871 is_move_reg: true,
1872 },
1873
1874 101 => FastCost {
1875 cycles: 2,
1876 decode_slots: 1,
1877 exec_unit: EU_NONE,
1878 src_mask: 0,
1879 dst_mask: 0,
1880 is_terminator: false,
1881 is_move_reg: false,
1882 },
1883
1884 81..=90 => {
1886 let bc = branch_cost(code, bitmask, branch_target);
1887 FastCost {
1888 cycles: bc as u8,
1889 decode_slots: 1,
1890 exec_unit: EU_ALU,
1891 src_mask: r1(ra),
1892 dst_mask: 0,
1893 is_terminator: true,
1894 is_move_reg: false,
1895 }
1896 }
1897 170..=175 => {
1899 let bc = branch_cost(code, bitmask, branch_target);
1900 FastCost {
1901 cycles: bc as u8,
1902 decode_slots: 1,
1903 exec_unit: EU_ALU,
1904 src_mask: r2(ra, rb),
1905 dst_mask: 0,
1906 is_terminator: true,
1907 is_move_reg: false,
1908 }
1909 }
1910
1911 200 | 201 | 210 | 211 | 212 => {
1913 let s = r2(rb, rd);
1914 let dc = if dst_src_overlap(ra, s) { 1 } else { 2 };
1915 FastCost {
1916 cycles: 1,
1917 decode_slots: dc,
1918 exec_unit: EU_ALU,
1919 src_mask: s,
1920 dst_mask: r1(ra),
1921 is_terminator: false,
1922 is_move_reg: false,
1923 }
1924 }
1925 190 | 191 => {
1927 let s = r2(rb, rd);
1928 let dc = if dst_src_overlap(ra, s) { 2 } else { 3 };
1929 FastCost {
1930 cycles: 2,
1931 decode_slots: dc,
1932 exec_unit: EU_ALU,
1933 src_mask: s,
1934 dst_mask: r1(ra),
1935 is_terminator: false,
1936 is_move_reg: false,
1937 }
1938 }
1939 132 | 133 | 134 | 149 | 151 | 152 | 153 | 158 | 110 => {
1941 let dc = if dst_src_overlap(ra, r1(rb)) { 1 } else { 2 };
1942 FastCost {
1943 cycles: 1,
1944 decode_slots: dc,
1945 exec_unit: EU_ALU,
1946 src_mask: r1(rb),
1947 dst_mask: r1(ra),
1948 is_terminator: false,
1949 is_move_reg: false,
1950 }
1951 }
1952 131 | 138 | 139 | 140 | 160 => {
1954 let dc = if dst_src_overlap(ra, r1(rb)) { 2 } else { 3 };
1955 FastCost {
1956 cycles: 2,
1957 decode_slots: dc,
1958 exec_unit: EU_ALU,
1959 src_mask: r1(rb),
1960 dst_mask: r1(ra),
1961 is_terminator: false,
1962 is_move_reg: false,
1963 }
1964 }
1965 102 | 103 | 104 | 105 | 108 | 109 | 111 => FastCost {
1967 cycles: 1,
1968 decode_slots: 1,
1969 exec_unit: EU_ALU,
1970 src_mask: r1(rb),
1971 dst_mask: r1(ra),
1972 is_terminator: false,
1973 is_move_reg: false,
1974 },
1975 106 | 107 => FastCost {
1977 cycles: 2,
1978 decode_slots: 1,
1979 exec_unit: EU_ALU,
1980 src_mask: r1(rb),
1981 dst_mask: r1(ra),
1982 is_terminator: false,
1983 is_move_reg: false,
1984 },
1985
1986 207 | 208 | 209 | 220 | 222 => {
1988 let dc = if rb == ra { 2 } else { 3 };
1989 FastCost {
1990 cycles: 1,
1991 decode_slots: dc,
1992 exec_unit: EU_ALU,
1993 src_mask: r2(rb, rd),
1994 dst_mask: r1(ra),
1995 is_terminator: false,
1996 is_move_reg: false,
1997 }
1998 }
1999 197 | 198 | 199 | 221 | 223 => {
2001 let dc = if rb == ra { 3 } else { 4 };
2002 FastCost {
2003 cycles: 2,
2004 decode_slots: dc,
2005 exec_unit: EU_ALU,
2006 src_mask: r2(rb, rd),
2007 dst_mask: r1(ra),
2008 is_terminator: false,
2009 is_move_reg: false,
2010 }
2011 }
2012 155 | 156 | 157 | 159 => FastCost {
2014 cycles: 1,
2015 decode_slots: 3,
2016 exec_unit: EU_ALU,
2017 src_mask: r1(rb),
2018 dst_mask: r1(ra),
2019 is_terminator: false,
2020 is_move_reg: false,
2021 },
2022 144 | 145 | 146 | 161 => FastCost {
2024 cycles: 2,
2025 decode_slots: 4,
2026 exec_unit: EU_ALU,
2027 src_mask: r1(rb),
2028 dst_mask: r1(ra),
2029 is_terminator: false,
2030 is_move_reg: false,
2031 },
2032
2033 216 | 217 => FastCost {
2035 cycles: 3,
2036 decode_slots: 3,
2037 exec_unit: EU_ALU,
2038 src_mask: r2(rb, rd),
2039 dst_mask: r1(ra),
2040 is_terminator: false,
2041 is_move_reg: false,
2042 },
2043 136 | 137 | 142 | 143 => FastCost {
2045 cycles: 3,
2046 decode_slots: 3,
2047 exec_unit: EU_ALU,
2048 src_mask: r1(rb),
2049 dst_mask: r1(ra),
2050 is_terminator: false,
2051 is_move_reg: false,
2052 },
2053
2054 218 | 219 => FastCost {
2056 cycles: 2,
2057 decode_slots: 2,
2058 exec_unit: EU_ALU,
2059 src_mask: r2(rb, rd),
2060 dst_mask: r1(ra),
2061 is_terminator: false,
2062 is_move_reg: false,
2063 },
2064 147 | 148 => FastCost {
2066 cycles: 2,
2067 decode_slots: 3,
2068 exec_unit: EU_ALU,
2069 src_mask: r1(rb),
2070 dst_mask: r1(ra),
2071 is_terminator: false,
2072 is_move_reg: false,
2073 },
2074
2075 227..=230 => {
2077 let s = r2(rb, rd);
2078 let dc = if dst_src_overlap(ra, s) { 2 } else { 3 };
2079 FastCost {
2080 cycles: 3,
2081 decode_slots: dc,
2082 exec_unit: EU_ALU,
2083 src_mask: s,
2084 dst_mask: r1(ra),
2085 is_terminator: false,
2086 is_move_reg: false,
2087 }
2088 }
2089 224 | 225 => FastCost {
2091 cycles: 2,
2092 decode_slots: 3,
2093 exec_unit: EU_ALU,
2094 src_mask: r2(rb, rd),
2095 dst_mask: r1(ra),
2096 is_terminator: false,
2097 is_move_reg: false,
2098 },
2099 226 => {
2101 let s = r2(rb, rd);
2102 let dc = if dst_src_overlap(ra, s) { 2 } else { 3 };
2103 FastCost {
2104 cycles: 2,
2105 decode_slots: dc,
2106 exec_unit: EU_ALU,
2107 src_mask: s,
2108 dst_mask: r1(ra),
2109 is_terminator: false,
2110 is_move_reg: false,
2111 }
2112 }
2113 154 => FastCost {
2115 cycles: 2,
2116 decode_slots: 3,
2117 exec_unit: EU_ALU,
2118 src_mask: r1(rb),
2119 dst_mask: r1(ra),
2120 is_terminator: false,
2121 is_move_reg: false,
2122 },
2123 141 => FastCost {
2124 cycles: 3,
2125 decode_slots: 4,
2126 exec_unit: EU_ALU,
2127 src_mask: r1(rb),
2128 dst_mask: r1(ra),
2129 is_terminator: false,
2130 is_move_reg: false,
2131 },
2132
2133 202 => {
2135 let s = r2(rb, rd);
2136 let dc = if dst_src_overlap(ra, s) { 1 } else { 2 };
2137 FastCost {
2138 cycles: 3,
2139 decode_slots: dc,
2140 exec_unit: EU_MUL,
2141 src_mask: s,
2142 dst_mask: r1(ra),
2143 is_terminator: false,
2144 is_move_reg: false,
2145 }
2146 }
2147 150 => {
2149 let dc = if dst_src_overlap(ra, r1(rb)) { 1 } else { 2 };
2150 FastCost {
2151 cycles: 3,
2152 decode_slots: dc,
2153 exec_unit: EU_MUL,
2154 src_mask: r1(rb),
2155 dst_mask: r1(ra),
2156 is_terminator: false,
2157 is_move_reg: false,
2158 }
2159 }
2160 192 => {
2162 let s = r2(rb, rd);
2163 let dc = if dst_src_overlap(ra, s) { 2 } else { 3 };
2164 FastCost {
2165 cycles: 4,
2166 decode_slots: dc,
2167 exec_unit: EU_MUL,
2168 src_mask: s,
2169 dst_mask: r1(ra),
2170 is_terminator: false,
2171 is_move_reg: false,
2172 }
2173 }
2174 135 => {
2176 let dc = if dst_src_overlap(ra, r1(rb)) { 2 } else { 3 };
2177 FastCost {
2178 cycles: 4,
2179 decode_slots: dc,
2180 exec_unit: EU_MUL,
2181 src_mask: r1(rb),
2182 dst_mask: r1(ra),
2183 is_terminator: false,
2184 is_move_reg: false,
2185 }
2186 }
2187 213 | 214 => FastCost {
2189 cycles: 4,
2190 decode_slots: 4,
2191 exec_unit: EU_MUL,
2192 src_mask: r2(rb, rd),
2193 dst_mask: r1(ra),
2194 is_terminator: false,
2195 is_move_reg: false,
2196 },
2197 215 => FastCost {
2198 cycles: 6,
2199 decode_slots: 4,
2200 exec_unit: EU_MUL,
2201 src_mask: r2(rb, rd),
2202 dst_mask: r1(ra),
2203 is_terminator: false,
2204 is_move_reg: false,
2205 },
2206
2207 193 | 194 | 195 | 196 | 203 | 204 | 205 | 206 => FastCost {
2209 cycles: 60,
2210 decode_slots: 4,
2211 exec_unit: EU_DIV,
2212 src_mask: r2(rb, rd),
2213 dst_mask: r1(ra),
2214 is_terminator: false,
2215 is_move_reg: false,
2216 },
2217
2218 _ => FastCost {
2220 cycles: 1,
2221 decode_slots: 1,
2222 exec_unit: EU_NONE,
2223 src_mask: 0,
2224 dst_mask: 0,
2225 is_terminator: false,
2226 is_move_reg: false,
2227 },
2228 }
2229}
2230
2231#[derive(Clone, Copy)]
2238struct GasCostEntry {
2239 cycles: u8,
2240 decode_slots: u8,
2242 exec_unit: u8,
2243 src_pat: u8,
2245 dst_pat: u8,
2247 flags: u8, overlap_slots: u8,
2250}
2251
2252const F_TERM: u8 = 1;
2253const F_MOVE: u8 = 2;
2254const F_BRANCH: u8 = 4;
2255const F_OVERLAP: u8 = 8;
2256const F_BRANCH2: u8 = 16; const F_SHIFT_OVERLAP: u8 = 32; const fn gc(
2260 cycles: u8,
2261 decode_slots: u8,
2262 exec_unit: u8,
2263 src_pat: u8,
2264 dst_pat: u8,
2265 flags: u8,
2266) -> GasCostEntry {
2267 GasCostEntry {
2268 cycles,
2269 decode_slots,
2270 exec_unit,
2271 src_pat,
2272 dst_pat,
2273 flags,
2274 overlap_slots: 0,
2275 }
2276}
2277const fn gc_ov(
2278 cycles: u8,
2279 overlap_if: u8,
2280 overlap_no: u8,
2281 exec_unit: u8,
2282 src_pat: u8,
2283 dst_pat: u8,
2284 flags: u8,
2285) -> GasCostEntry {
2286 GasCostEntry {
2287 cycles,
2288 decode_slots: 0,
2289 exec_unit,
2290 src_pat,
2291 dst_pat,
2292 flags: flags | F_OVERLAP,
2293 overlap_slots: overlap_if | (overlap_no << 4),
2294 }
2295}
2296
2297static GAS_COST_LUT: [GasCostEntry; 256] = {
2298 let d = gc(1, 1, EU_NONE, 0, 0, 0); let mut t = [d; 256];
2300 t[0] = gc(2, 1, EU_NONE, 0, 0, F_TERM);
2302 t[1] = gc(2, 1, EU_NONE, 0, 0, F_TERM);
2303 t[2] = gc(40, 1, EU_NONE, 0, 0, F_TERM);
2304 t[3] = gc(100, 4, EU_ALU, 0, 0, F_TERM);
2308 t[10] = gc(100, 4, EU_ALU, 0, 0, F_TERM);
2309 t[40] = gc(15, 1, EU_ALU, 0, 0, F_TERM);
2311 t[80] = gc(15, 1, EU_ALU, 0, 1, F_TERM); t[50] = gc(22, 1, EU_ALU, 0, 0, F_TERM);
2313 t[180] = gc(22, 1, EU_ALU, 2, 1, F_TERM); let mut i = 52;
2316 while i <= 58 {
2317 t[i] = gc(25, 1, EU_LOAD, 2, 1, 0);
2318 i += 1;
2319 }
2320 i = 124;
2321 while i <= 130 {
2322 t[i] = gc(25, 1, EU_LOAD, 2, 1, 0);
2323 i += 1;
2324 }
2325 i = 59;
2327 while i <= 62 {
2328 t[i] = gc(25, 1, EU_STORE, 3, 0, 0);
2329 i += 1;
2330 }
2331 i = 120;
2332 while i <= 123 {
2333 t[i] = gc(25, 1, EU_STORE, 3, 0, 0);
2334 i += 1;
2335 }
2336 i = 30;
2337 while i <= 33 {
2338 t[i] = gc(25, 1, EU_STORE, 0, 0, 0);
2339 i += 1;
2340 }
2341 i = 70;
2342 while i <= 73 {
2343 t[i] = gc(25, 1, EU_STORE, 1, 0, 0);
2344 i += 1;
2345 } t[51] = gc(1, 1, EU_NONE, 0, 1, 0);
2348 t[20] = gc(1, 2, EU_NONE, 0, 1, 0);
2349 t[100] = gc(0, 1, EU_NONE, 2, 1, F_MOVE); t[101] = gc(2, 1, EU_NONE, 0, 0, 0); i = 81;
2354 while i <= 90 {
2355 t[i] = gc(0, 1, EU_ALU, 1, 0, F_TERM | F_BRANCH);
2356 i += 1;
2357 } i = 170;
2360 while i <= 175 {
2361 t[i] = gc(0, 1, EU_ALU, 3, 0, F_TERM | F_BRANCH2);
2362 i += 1;
2363 } t[200] = gc_ov(1, 1, 2, EU_ALU, 4, 1, 0);
2366 t[201] = gc_ov(1, 1, 2, EU_ALU, 4, 1, 0);
2367 t[210] = gc_ov(1, 1, 2, EU_ALU, 4, 1, 0);
2368 t[211] = gc_ov(1, 1, 2, EU_ALU, 4, 1, 0);
2369 t[212] = gc_ov(1, 1, 2, EU_ALU, 4, 1, 0);
2370 t[190] = gc_ov(2, 2, 3, EU_ALU, 4, 1, 0);
2372 t[191] = gc_ov(2, 2, 3, EU_ALU, 4, 1, 0);
2373 {
2375 let e = gc_ov(1, 1, 2, EU_ALU, 2, 1, 0);
2376 t[132] = e;
2377 t[133] = e;
2378 t[134] = e;
2379 t[149] = e;
2380 t[151] = e;
2381 t[152] = e;
2382 t[153] = e;
2383 t[158] = e;
2384 t[110] = e;
2385 }
2386 {
2388 let e = gc_ov(2, 2, 3, EU_ALU, 2, 1, 0);
2389 t[131] = e;
2390 t[138] = e;
2391 t[139] = e;
2392 t[140] = e;
2393 t[160] = e;
2394 }
2395 {
2397 let e = gc(1, 1, EU_ALU, 2, 1, 0);
2398 t[102] = e;
2399 t[103] = e;
2400 t[104] = e;
2401 t[105] = e;
2402 t[108] = e;
2403 t[109] = e;
2404 t[111] = e;
2405 }
2406 t[106] = gc(2, 1, EU_ALU, 2, 1, 0);
2408 t[107] = gc(2, 1, EU_ALU, 2, 1, 0);
2409 {
2411 let e = gc_ov(1, 2, 3, EU_ALU, 4, 1, F_SHIFT_OVERLAP);
2412 t[207] = e;
2413 t[208] = e;
2414 t[209] = e;
2415 t[220] = e;
2416 t[222] = e;
2417 }
2418 {
2420 let e = gc_ov(2, 3, 4, EU_ALU, 4, 1, F_SHIFT_OVERLAP);
2421 t[197] = e;
2422 t[198] = e;
2423 t[199] = e;
2424 t[221] = e;
2425 t[223] = e;
2426 }
2427 {
2429 let e = gc(1, 3, EU_ALU, 2, 1, 0);
2430 t[155] = e;
2431 t[156] = e;
2432 t[157] = e;
2433 t[159] = e;
2434 }
2435 {
2437 let e = gc(2, 4, EU_ALU, 2, 1, 0);
2438 t[144] = e;
2439 t[145] = e;
2440 t[146] = e;
2441 t[161] = e;
2442 }
2443 t[216] = gc(3, 3, EU_ALU, 4, 1, 0);
2445 t[217] = gc(3, 3, EU_ALU, 4, 1, 0);
2446 {
2448 let e = gc(3, 3, EU_ALU, 2, 1, 0);
2449 t[136] = e;
2450 t[137] = e;
2451 t[142] = e;
2452 t[143] = e;
2453 }
2454 t[218] = gc(2, 2, EU_ALU, 4, 1, 0);
2456 t[219] = gc(2, 2, EU_ALU, 4, 1, 0);
2457 t[147] = gc(2, 3, EU_ALU, 2, 1, 0);
2459 t[148] = gc(2, 3, EU_ALU, 2, 1, 0);
2460 {
2462 let e = gc_ov(3, 2, 3, EU_ALU, 4, 1, 0);
2463 t[227] = e;
2464 t[228] = e;
2465 t[229] = e;
2466 t[230] = e;
2467 }
2468 t[224] = gc(2, 3, EU_ALU, 4, 1, 0);
2470 t[225] = gc(2, 3, EU_ALU, 4, 1, 0);
2471 t[226] = gc_ov(2, 2, 3, EU_ALU, 4, 1, 0);
2473 t[154] = gc(2, 3, EU_ALU, 2, 1, 0);
2475 t[141] = gc(3, 4, EU_ALU, 2, 1, 0);
2476 t[202] = gc_ov(3, 1, 2, EU_MUL, 4, 1, 0);
2478 t[150] = gc_ov(3, 1, 2, EU_MUL, 2, 1, 0);
2480 t[192] = gc_ov(4, 2, 3, EU_MUL, 4, 1, 0);
2482 t[135] = gc_ov(4, 2, 3, EU_MUL, 2, 1, 0);
2484 t[213] = gc(4, 4, EU_MUL, 4, 1, 0);
2486 t[214] = gc(4, 4, EU_MUL, 4, 1, 0);
2487 t[215] = gc(6, 4, EU_MUL, 4, 1, 0);
2488 {
2490 let e = gc(60, 4, EU_DIV, 4, 1, 0);
2491 t[193] = e;
2492 t[194] = e;
2493 t[195] = e;
2494 t[196] = e;
2495 t[203] = e;
2496 t[204] = e;
2497 t[205] = e;
2498 t[206] = e;
2499 }
2500 t
2501};
2502
2503#[inline(always)]
2507pub fn feed_gas_direct(
2508 opcode_byte: u8,
2509 ra: u8,
2510 rb: u8,
2511 rd: u8,
2512 gas_sim: &mut crate::gas_sim::GasSimulator,
2513 mem_cycles: u8,
2514) -> (bool, bool) {
2515 let entry = &GAS_COST_LUT[opcode_byte as usize];
2516 let flags = entry.flags;
2517
2518 if flags & (F_BRANCH | F_BRANCH2 | F_OVERLAP | F_MOVE | F_SHIFT_OVERLAP) == 0 {
2520 let (src1, src2) = match entry.src_pat {
2522 0 => (0xFF, 0xFF),
2523 1 => (ra.min(12), 0xFF),
2524 2 => (rb.min(12), 0xFF),
2525 3 => (ra.min(12), rb.min(12)),
2526 4 => (rb.min(12), rd.min(12)),
2527 _ => (0xFF, 0xFF),
2528 };
2529 let dst = if entry.dst_pat == 1 {
2530 ra.min(12)
2531 } else if entry.dst_pat == 2 {
2532 rd.min(12)
2533 } else {
2534 0xFF
2535 };
2536 let cycles = if entry.exec_unit == EU_LOAD || entry.exec_unit == EU_STORE {
2538 mem_cycles
2539 } else {
2540 entry.cycles
2541 };
2542 gas_sim.feed_direct(cycles, entry.decode_slots, src1, src2, dst);
2543 return (flags & F_TERM != 0, false);
2544 }
2545
2546 (flags & F_TERM != 0, true)
2548}
2549
2550#[inline(always)]
2553pub fn fast_cost_lut(
2554 opcode_byte: u8,
2555 args: &crate::args::Args,
2556 pc: u32,
2557 code: &[u8],
2558 bitmask: &[u8],
2559 mem_cycles: u8,
2560) -> FastCost {
2561 let pcu = pc as usize;
2562 let reg_byte1 = if pcu + 1 < code.len() {
2563 code[pcu + 1]
2564 } else {
2565 0xFF
2566 };
2567 let ra = reg_byte1 & 0x0F;
2568 let rb = (reg_byte1 >> 4) & 0x0F;
2569 let rd = if pcu + 2 < code.len() {
2570 code[pcu + 2] & 0x0F
2571 } else {
2572 0xFF
2573 };
2574
2575 fast_cost_lut_inner(
2576 opcode_byte,
2577 args,
2578 pcu,
2579 code,
2580 bitmask,
2581 ra,
2582 rb,
2583 rd,
2584 mem_cycles,
2585 )
2586}
2587
2588#[inline(always)]
2591#[allow(clippy::too_many_arguments)]
2592pub fn fast_cost_lut_regs(
2593 opcode_byte: u8,
2594 args: &crate::args::Args,
2595 pc: usize,
2596 code: &[u8],
2597 bitmask: &[u8],
2598 ra: u8,
2599 rb: u8,
2600 rd: u8,
2601 mem_cycles: u8,
2602) -> FastCost {
2603 fast_cost_lut_inner(opcode_byte, args, pc, code, bitmask, ra, rb, rd, mem_cycles)
2604}
2605
2606#[inline(always)]
2609#[allow(clippy::too_many_arguments)]
2610fn fast_cost_lut_inner(
2611 opcode_byte: u8,
2612 args: &crate::args::Args,
2613 pcu: usize,
2614 code: &[u8],
2615 bitmask: &[u8],
2616 ra: u8,
2617 rb: u8,
2618 rd: u8,
2619 mem_cycles: u8,
2620) -> FastCost {
2621 use crate::args::Args;
2622
2623 let entry = &GAS_COST_LUT[opcode_byte as usize];
2624 let flags = entry.flags;
2625
2626 if flags & (F_BRANCH | F_BRANCH2 | F_OVERLAP) == 0 {
2629 let ra_bit = 1u16 << ra.min(12);
2632 let rb_bit = 1u16 << rb.min(12);
2633 let rd_bit = 1u16 << rd.min(12);
2634 let src_mask: u16 = match entry.src_pat {
2635 0 => 0,
2636 1 => ra_bit,
2637 2 => rb_bit,
2638 3 => ra_bit | rb_bit,
2639 4 => rb_bit | rd_bit,
2640 _ => 0,
2641 };
2642 let dst_mask: u16 = if entry.dst_pat == 1 { ra_bit } else { 0 };
2643 let cycles = if entry.exec_unit == EU_LOAD || entry.exec_unit == EU_STORE {
2644 mem_cycles
2645 } else {
2646 entry.cycles
2647 };
2648 return FastCost {
2649 cycles,
2650 decode_slots: entry.decode_slots,
2651 exec_unit: entry.exec_unit,
2652 src_mask,
2653 dst_mask,
2654 is_terminator: flags & F_TERM != 0,
2655 is_move_reg: flags & F_MOVE != 0,
2656 };
2657 }
2658
2659 let ra_bit = 1u16 << ra.min(12);
2661 let rb_bit = 1u16 << rb.min(12);
2662 let rd_bit = 1u16 << rd.min(12);
2663
2664 let src_mask: u16 = match entry.src_pat {
2665 0 => 0,
2666 1 => ra_bit,
2667 2 => rb_bit,
2668 3 => ra_bit | rb_bit,
2669 4 => rb_bit | rd_bit,
2670 _ => 0,
2671 };
2672 let dst_mask: u16 = if entry.dst_pat == 1 { ra_bit } else { 0 };
2673
2674 let cycles = if flags & (F_BRANCH | F_BRANCH2) != 0 {
2675 let branch_target = match args {
2676 Args::RegImmOffset { offset, .. } => *offset as usize,
2677 Args::TwoRegOffset { offset, .. } => *offset as usize,
2678 Args::Offset { offset } => *offset as usize,
2679 _ => pcu,
2680 };
2681 branch_cost(code, bitmask, branch_target) as u8
2682 } else if entry.exec_unit == EU_LOAD || entry.exec_unit == EU_STORE {
2683 mem_cycles
2684 } else {
2685 entry.cycles
2686 };
2687
2688 let decode_slots = if flags & F_OVERLAP != 0 {
2689 let overlap = if flags & F_SHIFT_OVERLAP != 0 {
2690 rb == ra
2691 } else {
2692 (dst_mask & src_mask) != 0
2693 };
2694 if overlap {
2695 entry.overlap_slots & 0x0F
2696 } else {
2697 entry.overlap_slots >> 4
2698 }
2699 } else {
2700 entry.decode_slots
2701 };
2702
2703 FastCost {
2704 cycles,
2705 decode_slots,
2706 exec_unit: entry.exec_unit,
2707 src_mask,
2708 dst_mask,
2709 is_terminator: flags & F_TERM != 0,
2710 is_move_reg: flags & F_MOVE != 0,
2711 }
2712}
2713
2714#[inline(always)]
2716#[allow(dead_code)] fn eu_available(avail: &[u8; 5], eu: u8) -> bool {
2718 match eu {
2719 EU_NONE => true,
2720 EU_ALU => avail[0] >= 1,
2721 EU_LOAD => avail[0] >= 1 && avail[1] >= 1,
2722 EU_STORE => avail[0] >= 1 && avail[2] >= 1,
2723 EU_MUL => avail[0] >= 1 && avail[3] >= 1,
2724 EU_DIV => avail[0] >= 1 && avail[4] >= 1,
2725 _ => false,
2726 }
2727}
2728
2729#[inline(always)]
2731#[allow(dead_code)] fn eu_consume(avail: &mut [u8; 5], eu: u8) {
2733 match eu {
2734 EU_ALU => {
2735 avail[0] -= 1;
2736 }
2737 EU_LOAD => {
2738 avail[0] -= 1;
2739 avail[1] -= 1;
2740 }
2741 EU_STORE => {
2742 avail[0] -= 1;
2743 avail[2] -= 1;
2744 }
2745 EU_MUL => {
2746 avail[0] -= 1;
2747 avail[3] -= 1;
2748 }
2749 EU_DIV => {
2750 avail[0] -= 1;
2751 avail[4] -= 1;
2752 }
2753 _ => {}
2754 }
2755}
2756
2757#[cfg(all(target_os = "linux", target_arch = "x86_64"))]
2762fn advance_cycle(cycles_left: &mut [u8; 32], exe_mask: &mut u32, fin_mask: &mut u32) {
2763 let mut exe = *exe_mask;
2764 while exe != 0 {
2765 let i = exe.trailing_zeros() as usize;
2766 exe &= exe - 1;
2767 if cycles_left[i] <= 1 {
2768 cycles_left[i] = 0;
2769 *exe_mask &= !(1u32 << i);
2770 *fin_mask |= 1u32 << i;
2771 } else {
2772 cycles_left[i] -= 1;
2773 }
2774 }
2775}
2776
2777#[cfg(all(target_os = "linux", target_arch = "x86_64"))]
2778fn gas_sim_fast(
2779 instrs: &[crate::predecoded::PreDecodedInst],
2780 _code: &[u8],
2781 _bitmask: &[u8],
2782) -> u32 {
2783 let mut state = [0u8; 32]; let mut cycles_left = [0u8; 32];
2785 let mut exec_unit = [0u8; 32];
2786 let mut deps = [0u32; 32];
2787 let mut reg_writer = [0xFFu8; 16];
2788
2789 let mut fin_mask: u32 = 0;
2790 let mut wait_mask: u32 = 0;
2791 let mut exe_mask: u32 = 0;
2792
2793 let mut next_slot: u8 = 0;
2794 let mut instr_idx: usize = 0;
2795 let mut cycles: u32 = 0;
2796 let mut decode_slots: u8 = 4;
2797 let mut dispatch_slots: u8 = 5;
2798 let mut eu_avail: [u8; 5] = [4, 4, 4, 1, 1]; for _safety in 0..100_000u32 {
2801 while instr_idx < instrs.len() && decode_slots > 0 && (next_slot as usize) < 32 {
2802 let ii = &instrs[instr_idx];
2803 let cost = fast_cost_from_raw(
2804 ii.opcode as u8,
2805 ii.ra,
2806 ii.rb,
2807 ii.rd,
2808 ii.pc,
2809 _code,
2810 _bitmask,
2811 DEFAULT_MEM_CYCLES,
2812 );
2813
2814 if cost.is_move_reg {
2815 decode_slots = decode_slots.saturating_sub(cost.decode_slots);
2816 instr_idx = if cost.is_terminator {
2817 instrs.len()
2818 } else {
2819 instr_idx + 1
2820 };
2821 continue;
2822 }
2823
2824 let mut dep_mask: u32 = 0;
2825 let mut src = cost.src_mask;
2826 while src != 0 {
2827 let reg = src.trailing_zeros() as usize;
2828 src &= src - 1;
2829 let writer = reg_writer[reg];
2830 if writer != 0xFF && (fin_mask & (1u32 << writer)) == 0 {
2831 dep_mask |= 1u32 << writer;
2832 }
2833 }
2834
2835 let slot = next_slot as usize;
2836 state[slot] = 1; cycles_left[slot] = cost.cycles;
2838 exec_unit[slot] = cost.exec_unit;
2839 deps[slot] = dep_mask;
2840 wait_mask |= 1u32 << slot;
2841
2842 let mut dst = cost.dst_mask;
2843 while dst != 0 {
2844 let reg = dst.trailing_zeros() as usize;
2845 dst &= dst - 1;
2846 reg_writer[reg] = next_slot;
2847 }
2848
2849 next_slot += 1;
2850 decode_slots = decode_slots.saturating_sub(cost.decode_slots);
2851 instr_idx = if cost.is_terminator {
2852 instrs.len()
2853 } else {
2854 instr_idx + 1
2855 };
2856 }
2857
2858 while dispatch_slots > 0 {
2859 let mut candidates = wait_mask;
2860 let mut found = false;
2861 while candidates != 0 {
2862 let i = candidates.trailing_zeros() as usize;
2863 candidates &= candidates - 1;
2864 if (deps[i] & !fin_mask) == 0 && eu_available(&eu_avail, exec_unit[i]) {
2865 eu_consume(&mut eu_avail, exec_unit[i]);
2866 state[i] = 2; wait_mask &= !(1u32 << i);
2868 exe_mask |= 1u32 << i;
2869 dispatch_slots -= 1;
2870 found = true;
2871 break;
2872 }
2873 }
2874 if !found {
2875 break;
2876 }
2877 }
2878
2879 if instr_idx >= instrs.len() && exe_mask == 0 && wait_mask == 0 {
2880 break;
2881 }
2882
2883 advance_cycle(&mut cycles_left, &mut exe_mask, &mut fin_mask);
2884
2885 cycles += 1;
2886 decode_slots = 4;
2887 dispatch_slots = 5;
2888 eu_avail = [4, 4, 4, 1, 1];
2889 }
2890
2891 let _ = state;
2892 cycles
2893}
2894
2895#[cfg(all(target_os = "linux", target_arch = "x86_64"))]
2897pub fn gas_cost_for_block_fast(
2898 instrs: &[crate::predecoded::PreDecodedInst],
2899 code: &[u8],
2900 bitmask: &[u8],
2901) -> u64 {
2902 let cycles = gas_sim_fast(instrs, code, bitmask);
2903 if cycles > 3 { (cycles - 3) as u64 } else { 1 }
2904}
2905
2906#[cfg(test)]
2907mod tests {
2908 use super::*;
2909 use crate::gas_sim::GasSimulator;
2910
2911 fn block_cost(code: &[u8], bitmask: &[u8]) -> u32 {
2913 let mut sim = GasSimulator::new();
2914 let mut pc = 0;
2915 while pc < code.len() {
2916 if pc < bitmask.len() && bitmask[pc] != 1 {
2917 pc += 1;
2918 continue;
2919 }
2920 let opcode_byte = code[pc];
2921 let raw_ra = if pc + 1 < code.len() {
2922 code[pc + 1] & 0x0F
2923 } else {
2924 0xFF
2925 };
2926 let raw_rb = if pc + 1 < code.len() {
2927 (code[pc + 1] >> 4) & 0x0F
2928 } else {
2929 0xFF
2930 };
2931 let raw_rd = if pc + 2 < code.len() {
2932 code[pc + 2] & 0x0F
2933 } else {
2934 0xFF
2935 };
2936 let fc = fast_cost_from_raw(
2937 opcode_byte,
2938 raw_ra,
2939 raw_rb,
2940 raw_rd,
2941 pc as u32,
2942 code,
2943 bitmask,
2944 DEFAULT_MEM_CYCLES,
2945 );
2946 sim.feed(&fc);
2947 if fc.is_terminator {
2948 break;
2949 }
2950 let skip = skip_distance(bitmask, pc);
2951 pc += 1 + skip;
2952 }
2953 sim.flush_and_get_cost()
2954 }
2955
2956 #[test]
2957 fn test_single_trap() {
2958 assert_eq!(block_cost(&[0u8], &[1u8]), 1);
2960 }
2961
2962 #[test]
2963 fn test_single_ecalli() {
2964 assert_eq!(block_cost(&[10u8, 0], &[1, 0]), 97);
2966 }
2967
2968 #[test]
2969 fn test_single_jump() {
2970 assert_eq!(block_cost(&[40u8, 0], &[1, 0]), 12);
2972 }
2973
2974 #[test]
2975 fn test_single_fallthrough() {
2976 assert_eq!(block_cost(&[1u8], &[1]), 1);
2978 }
2979
2980 #[test]
2981 fn test_load_imm_then_trap() {
2982 let cost = block_cost(&[51, 0, 42, 0], &[1, 0, 0, 1]);
2983 assert!(cost >= 1, "cost should be >= 1, got {}", cost);
2984 }
2985}
2986
2987#[cfg(test)]
2988mod proptests {
2989 use super::*;
2990 use proptest::prelude::*;
2991
2992 proptest! {
2993 #[test]
2995 fn gas_cost_always_at_least_one(
2996 code in proptest::collection::vec(any::<u8>(), 1..64),
2997 ) {
2998 let mut bitmask = vec![0u8; code.len()];
3000 bitmask[0] = 1;
3001 let cost = gas_cost_for_block(&code, &bitmask, 0);
3002 prop_assert!(cost >= 1);
3003 }
3004
3005 #[test]
3007 fn skip_distance_bounded(
3008 bitmask in proptest::collection::vec(0u8..=1, 1..64),
3009 pc in 0usize..63,
3010 ) {
3011 let dist = skip_distance(&bitmask, pc);
3012 prop_assert!(dist <= 24);
3013 }
3014
3015 #[test]
3017 fn reset_satisfies_all_unit_types(choice in 0u8..6) {
3018 let req = match choice {
3019 0 => ExecUnits::NONE,
3020 1 => ExecUnits::ALU,
3021 2 => ExecUnits::LOAD,
3022 3 => ExecUnits::STORE,
3023 4 => ExecUnits::MUL,
3024 5 => ExecUnits::DIV,
3025 _ => unreachable!(),
3026 };
3027 prop_assert!(ExecUnits::RESET.can_satisfy(req));
3028 }
3029
3030 #[test]
3033 fn sub_preserves_non_negative(choice in 0u8..6) {
3034 let req = match choice {
3035 0 => ExecUnits::NONE,
3036 1 => ExecUnits::ALU,
3037 2 => ExecUnits::LOAD,
3038 3 => ExecUnits::STORE,
3039 4 => ExecUnits::MUL,
3040 5 => ExecUnits::DIV,
3041 _ => unreachable!(),
3042 };
3043 let remaining = ExecUnits::RESET.sub(req);
3044 prop_assert!(remaining.can_satisfy(ExecUnits::NONE));
3045 }
3046
3047 #[test]
3049 fn gas_cost_deterministic(
3050 code in proptest::collection::vec(any::<u8>(), 1..32),
3051 ) {
3052 let mut bitmask = vec![0u8; code.len()];
3053 bitmask[0] = 1;
3054 let cost1 = gas_cost_for_block(&code, &bitmask, 0);
3055 let cost2 = gas_cost_for_block(&code, &bitmask, 0);
3056 prop_assert_eq!(cost1, cost2);
3057 }
3058
3059 #[test]
3061 fn reg_bit_is_power_of_two(r in 0u8..16) {
3062 let bit = reg_bit(r);
3063 prop_assert!(bit.is_power_of_two());
3064 }
3065
3066 #[test]
3068 fn reg_bit_clamps_high_registers(r in 13u8..=15) {
3069 prop_assert_eq!(reg_bit(r), reg_bit(12));
3070 }
3071
3072 #[test]
3074 fn regset_contains_matches_construction(a in 0u8..13, b in 0u8..13) {
3075 prop_assume!(a != b);
3076 let set = RegSet::two(a, b);
3077 prop_assert!(set.contains(a));
3078 prop_assert!(set.contains(b));
3079
3080 let single = RegSet::one(a);
3081 prop_assert!(single.contains(a));
3082 prop_assert!(!single.contains(b) || a == b);
3083 }
3084 }
3085}