Skip to main content

javm_transpiler/
linker.rs

1//! Linker-based RISC-V ELF to PVM transpilation.
2//!
3//! Unlike the basic `transpile_elf`, this module processes ELF relocations
4//! to correctly handle data references in code. This is required for
5//! real-world programs (like k256 crypto) that reference .rodata constants.
6//!
7//! Approach:
8//! 1. Parse ELF sections and relocations
9//! 2. Compute PVM memory layout (stack, ro_data, rw_data addresses)
10//! 3. Build a relocation map: code_offset → resolved_address
11//! 4. Translate RISC-V instructions, using relocation info to replace
12//!    AUIPC+LO12 pairs with direct load_imm of the final PVM address
13//! 5. Emit a v3 `javm_cap::image::Image` with the code sub-blob,
14//!    declared endpoints, and standard kernel-ABI slot conventions.
15
16use crate::TranspileError;
17use crate::emitter;
18use crate::layout::{
19    HEAP_CAP_INDEX, PVM_PAGE_SIZE, ProgramLayout, RO_CAP_INDEX, RW_CAP_INDEX, STACK_CAP_INDEX,
20};
21use crate::riscv::TranslationContext;
22use javm_cap::SlotIdx;
23use javm_cap::abi::{BARE_GAS_SLOT, BARE_QUOTA_SLOT, BARE_YIELD_CATCHER_SLOT};
24use javm_cap::image::{EndpointDef, Image, InitialDataCap, MemoryMapping, PinnedCap};
25use javm_cap::slot::SlotPath;
26use std::collections::{BTreeMap, HashMap};
27
28/// PVM register index for the RISC-V stack pointer (φ[1]).
29const SP_REG: u8 = 1;
30
31/// RISC-V relocation types we care about.
32#[derive(Debug, Clone, Copy, PartialEq, Eq)]
33enum RelocType {
34    /// R_RISCV_32 (1): Absolute 32-bit address
35    Abs32,
36    /// R_RISCV_64 (2): Absolute 64-bit address
37    Abs64,
38    /// R_RISCV_CALL_PLT (19): AUIPC+JALR pair for function calls
39    CallPlt,
40    /// R_RISCV_PCREL_HI20 (23): Upper 20 bits of PC-relative address (AUIPC)
41    PcrelHi20,
42    /// R_RISCV_PCREL_LO12_I (24): Lower 12 bits, I-type (load/addi)
43    PcrelLo12I,
44    /// R_RISCV_PCREL_LO12_S (25): Lower 12 bits, S-type (store)
45    PcrelLo12S,
46    /// R_RISCV_ADD32 (35): Add 32-bit (paired with SUB32 for relative jump tables)
47    Add32,
48    /// R_RISCV_SUB32 (39): Subtract 32-bit (paired with ADD32 for relative jump tables)
49    Sub32,
50}
51
52impl RelocType {
53    fn from_raw(r: u32) -> Option<Self> {
54        match r {
55            1 => Some(Self::Abs32),
56            2 => Some(Self::Abs64),
57            19 => Some(Self::CallPlt),
58            23 => Some(Self::PcrelHi20),
59            24 => Some(Self::PcrelLo12I),
60            25 => Some(Self::PcrelLo12S),
61            35 => Some(Self::Add32),
62            39 => Some(Self::Sub32),
63            _ => None,
64        }
65    }
66}
67
68/// Parsed ELF with relocation info for linking.
69///
70/// Field roles:
71/// - `code_sections` / `code_ranges` / `entry_vaddr` /
72///   `hi20_targets` / `lo12_targets` / `call_targets` drive code
73///   translation in `translate_section_linked`.
74/// - `ro_data` / `rw_data` / `stack_size` / `heap_pages` /
75///   `abs_code_ptrs` / `sub32_relocs` feed the v3 Image data fields
76///   (`memory_mappings` / `pinned_slots` / `initial_slots`) via
77///   [`link_elf`] and [`rewrite_data_code_ptrs`].
78struct LinkedElf {
79    is_64bit: bool,
80    /// All code sections: (file_offset, vaddr, data)
81    code_sections: Vec<(u64, u64, Vec<u8>)>,
82    /// RO data blob and its PVM base address
83    ro_data: Vec<u8>,
84    _ro_base: u64,
85    /// RW data blob and its PVM base address
86    rw_data: Vec<u8>,
87    _rw_base: u64,
88    /// Stack size in bytes (= ro_base, so RO data is at the right PVM address)
89    stack_size: u32,
90    /// Heap pages
91    heap_pages: u32,
92    /// PCREL_HI20: AUIPC instruction vaddr → resolved data address.
93    /// The AUIPC itself should emit load_imm with this address.
94    hi20_targets: HashMap<u64, u64>,
95    /// PCREL_LO12: instruction vaddr → resolved data address (looked up from paired HI20).
96    /// These instructions should use the already-loaded address (from AUIPC/load_imm).
97    lo12_targets: HashMap<u64, u64>,
98    /// CALL_PLT: AUIPC instruction vaddr → target function RISC-V vaddr.
99    call_targets: HashMap<u64, u64>,
100    /// Absolute code pointers in data sections: (data_vaddr, target_code_vaddr, entry_size).
101    /// entry_size is 4 for 32-bit or 8 for 64-bit entries.
102    abs_code_ptrs: Vec<(u64, u64, u8)>,
103    /// SUB32 relocations: (data_vaddr, subtracted_addr).
104    /// For LLVM relative jump tables: entry = target - subtracted_addr.
105    /// Combined with the resolved entry value, we can recover the target.
106    sub32_relocs: Vec<(u64, u64)>,
107    /// Code section address ranges for detecting code pointers.
108    code_ranges: Vec<(u64, u64)>,
109    /// ELF entry point (e_entry) — the RISC-V vaddr of _start.
110    entry_vaddr: u64,
111}
112
113/// Transpile an rv64em ELF into a v3 chain [`Image`].
114///
115/// Output Image:
116/// - `code`: CODE sub-blob (jump_table + code + packed bitmask) of
117///   the translated user code.
118/// - `endpoints`: populated from the `.subsoil.endpoints` ELF
119///   section (entries emitted by `#[subsoil::endpoint(N)]`). Each
120///   descriptor's `fn_ptr` points at a per-endpoint trampoline
121///   that calls the user fn and halts. Guests must declare at
122///   least one endpoint; the transpiler errors if the section is
123///   absent or empty. Every endpoint gets `initial_regs[1] =
124///   stack_top` baked in.
125/// - `memory_mappings` + `pinned_slots` + `initial_slots`: declarative
126///   address-space layout. The transpiler emits one mapping per
127///   region (stack, ro, rw, heap) backed by a slot. ro_data lives
128///   in `pinned_slots` (RO at runtime); rw_data and the zero-filled
129///   stack/heap regions live in `initial_slots` (RW). The kernel's
130///   chain genesis installs Cap::Data's for each declared slot.
131/// - `gas_slots`, `quota_slots`, `yield_marker_slot`: standard
132///   kernel-ABI defaults from [`javm_cap::abi`].
133pub fn link_elf(elf_data: &[u8]) -> Result<Image, TranspileError> {
134    let elf = parse_linked_elf(elf_data)?;
135    let mut ctx = TranslationContext::new(elf.is_64bit);
136    ctx.code_ranges = elf.code_ranges.clone();
137
138    // Emit an unconditional jump to the ELF entry point (e_entry) so
139    // PC=0-of-user-code enters at _start rather than whatever function
140    // LLD placed first in .text. The fixup is resolved after
141    // translation in apply_fixups() (which maps RISC-V vaddrs → PVM
142    // PCs via jump_table). A gas-block boundary is required before
143    // the jump so the first instruction lives in its own basic block.
144    if elf.entry_vaddr != 0 {
145        ctx.emit_jump(elf.entry_vaddr);
146    }
147
148    for (_file_off, vaddr, data) in &elf.code_sections {
149        translate_section_linked(&mut ctx, data, *vaddr, &elf)?;
150    }
151    ctx.apply_fixups();
152
153    // The ro/rw byte vectors may contain RISC-V code-pointer bytes
154    // (LLVM jump tables, vtables). Rewrite them to PVM PCs before
155    // they get sealed into pinned_slots / initial_slots.
156    let mut ro_data = elf.ro_data.clone();
157    let mut rw_data = elf.rw_data.clone();
158    rewrite_data_code_ptrs(&elf, &mut ctx, &mut ro_data, &mut rw_data);
159
160    crate::peephole_fuse_load_imm_alu(&mut ctx.code, &mut ctx.bitmask, &ctx.jump_table);
161    crate::peephole_fuse_load_imm_memory(&mut ctx.code, &mut ctx.bitmask, &ctx.jump_table);
162    crate::peephole_eliminate_dead_load_imm(&mut ctx.code, &mut ctx.bitmask, &ctx.jump_table);
163    crate::ensure_branch_targets_are_block_starts(
164        &mut ctx.code,
165        &mut ctx.bitmask,
166        &mut ctx.jump_table,
167    );
168
169    let packed_bitmask = emitter::pack_bitmask(&ctx.bitmask);
170    let mut endpoints = read_subsoil_endpoints(elf_data, &elf, &ctx)?;
171
172    // Compute the data-region layout. The transpiler emits one
173    // MemoryMapping per region (stack/ro/rw/heap) backed by a slot
174    // declared in pinned_slots (ro) or initial_slots (rw and
175    // zero-filled).
176    let stack_pages = elf.stack_size / PVM_PAGE_SIZE;
177    let ro_pages = (ro_data.len() as u32).div_ceil(PVM_PAGE_SIZE);
178    let rw_pages = (rw_data.len() as u32).div_ceil(PVM_PAGE_SIZE);
179    let layout = ProgramLayout::compute(stack_pages, ro_pages, rw_pages, elf.heap_pages);
180    let stack_top = layout.stack_top();
181    for def in endpoints.values_mut() {
182        def.initial_regs.insert(SP_REG, stack_top);
183    }
184
185    let mut memory_mappings: Vec<MemoryMapping> = Vec::new();
186    let mut pinned_slots: BTreeMap<SlotIdx, PinnedCap> = BTreeMap::new();
187    let mut initial_slots: BTreeMap<SlotIdx, InitialDataCap> = BTreeMap::new();
188
189    let page_bytes = u64::from(PVM_PAGE_SIZE);
190
191    // Stack: ephemeral-like zero-filled DataCap at the stack slot.
192    let stack_slot = SlotIdx(u32::from(STACK_CAP_INDEX));
193    let stack_size = u64::from(layout.stack.page_count) * page_bytes;
194    memory_mappings.push(MemoryMapping {
195        start: u64::from(layout.stack.base_page) * page_bytes,
196        size: stack_size,
197        source: SlotPath::root(stack_slot),
198    });
199    initial_slots.insert(
200        stack_slot,
201        InitialDataCap {
202            content: Vec::new(),
203            size: stack_size,
204        },
205    );
206
207    // ro_data: pinned (read-only) with bytes baked into the Image.
208    if let Some(ro) = &layout.ro {
209        let ro_slot = SlotIdx(u32::from(RO_CAP_INDEX));
210        let size = u64::from(ro.page_count) * page_bytes;
211        memory_mappings.push(MemoryMapping {
212            start: u64::from(ro.base_page) * page_bytes,
213            size,
214            source: SlotPath::root(ro_slot),
215        });
216        pinned_slots.insert(
217            ro_slot,
218            PinnedCap::Data {
219                content: ro_data.clone(),
220                size,
221            },
222        );
223    }
224
225    // rw_data: non-pinned, initial bytes baked into the Image.
226    if let Some(rw) = &layout.rw {
227        let rw_slot = SlotIdx(u32::from(RW_CAP_INDEX));
228        let size = u64::from(rw.page_count) * page_bytes;
229        memory_mappings.push(MemoryMapping {
230            start: u64::from(rw.base_page) * page_bytes,
231            size,
232            source: SlotPath::root(rw_slot),
233        });
234        initial_slots.insert(
235            rw_slot,
236            InitialDataCap {
237                content: rw_data.clone(),
238                size,
239            },
240        );
241    }
242
243    // Heap: zero-filled DataCap at the heap slot.
244    if let Some(heap) = &layout.heap {
245        let heap_slot = SlotIdx(u32::from(HEAP_CAP_INDEX));
246        let size = u64::from(heap.page_count) * page_bytes;
247        memory_mappings.push(MemoryMapping {
248            start: u64::from(heap.base_page) * page_bytes,
249            size,
250            source: SlotPath::root(heap_slot),
251        });
252        initial_slots.insert(
253            heap_slot,
254            InitialDataCap {
255                content: Vec::new(),
256                size,
257            },
258        );
259    }
260
261    Ok(Image {
262        code: ctx.code.clone(),
263        packed_bitmask,
264        jump_table: ctx.jump_table.clone(),
265        endpoints,
266        memory_mappings,
267        gas_slots: vec![BARE_GAS_SLOT],
268        quota_slots: vec![BARE_QUOTA_SLOT],
269        pinned_slots,
270        initial_slots,
271        yield_marker_slot: Some(BARE_YIELD_CATCHER_SLOT),
272    })
273}
274
275/// Read the `.subsoil.endpoints` ELF section (emitted by
276/// `subsoil_derive::endpoint`) and resolve each descriptor's RISC-V
277/// fn_ptr to a PVM PC via the transpiler's address map.
278///
279/// Each descriptor is 16 bytes, `#[repr(C)]`:
280///   `fn_ptr: u64 LE | index: u8 | arg_registers: u8 | arg_cnode_size: u8 | _pad[5]`
281///
282/// `fn_ptr` points at a per-endpoint trampoline (a `call user_fn;
283/// ecall HALT` wrapper) emitted by `#[subsoil::endpoint(N)]`, not
284/// at the user fn itself. Guests must declare at least one
285/// endpoint; an absent or empty section is a hard error.
286fn read_subsoil_endpoints(
287    elf_data: &[u8],
288    elf: &LinkedElf,
289    ctx: &TranslationContext,
290) -> Result<BTreeMap<u8, EndpointDef>, TranspileError> {
291    let mut endpoints = BTreeMap::new();
292    // LLD emits one `.subsoil.endpoints` *input* section per
293    // `#[link_section]` static, and does not coalesce them into a
294    // single output section header — so the ELF can carry many
295    // section headers with this exact name, laid out contiguously
296    // by address. Concatenate them in address order so the descriptor
297    // array reads correctly regardless of header count.
298    let section_chunks = find_all_section_bytes(elf_data, ".subsoil.endpoints")?;
299    const DESCRIPTOR_SIZE: usize = 16;
300    for section_bytes in &section_chunks {
301        if section_bytes.len() % DESCRIPTOR_SIZE != 0 {
302            return Err(TranspileError::InvalidSection(format!(
303                ".subsoil.endpoints size {} is not a multiple of {}",
304                section_bytes.len(),
305                DESCRIPTOR_SIZE
306            )));
307        }
308        for chunk in section_bytes.chunks(DESCRIPTOR_SIZE) {
309            let fn_ptr = u64::from_le_bytes(chunk[0..8].try_into().unwrap());
310            let index = chunk[8];
311            let arg_registers = chunk[9];
312            let arg_cnode_size = chunk[10];
313            let pvm_pc = ctx.address_map.get(&fn_ptr).copied().ok_or_else(|| {
314                TranspileError::InvalidSection(format!(
315                    "subsoil endpoint {} fn_ptr {:#x} has no PVM address mapping",
316                    index, fn_ptr
317                ))
318            })?;
319            if endpoints
320                .insert(
321                    index,
322                    EndpointDef {
323                        entry_pc: pvm_pc as u64,
324                        arg_registers,
325                        arg_cnode_size,
326                        initial_regs: BTreeMap::new(),
327                    },
328                )
329                .is_some()
330            {
331                return Err(TranspileError::InvalidSection(format!(
332                    "duplicate #[subsoil::endpoint({})] declaration",
333                    index
334                )));
335            }
336        }
337    }
338    if endpoints.is_empty() {
339        return Err(TranspileError::InvalidSection(
340            ".subsoil.endpoints section is absent or empty: \
341             the guest must declare at least one #[subsoil::endpoint(N)]"
342                .into(),
343        ));
344    }
345    let _ = elf; // currently unused; reserved for future symbol-table cross-checks
346    Ok(endpoints)
347}
348
349/// Locate every section header with the given name and return
350/// their bytes, ordered by ELF virtual address. Multiple headers
351/// can share a name when LLD doesn't coalesce input sections
352/// (e.g., `#[link_section]` statics from a single rlib).
353fn find_all_section_bytes<'a>(
354    elf_data: &'a [u8],
355    section_name: &str,
356) -> Result<Vec<&'a [u8]>, TranspileError> {
357    if elf_data.len() < 64 || elf_data[0..4] != [0x7F, b'E', b'L', b'F'] {
358        return Err(TranspileError::ElfParse("not an ELF file".into()));
359    }
360    if elf_data[4] != 2 {
361        return Err(TranspileError::ElfParse("only 64-bit ELF supported".into()));
362    }
363    let e_shoff = u64::from_le_bytes(elf_data[40..48].try_into().unwrap()) as usize;
364    let e_shentsize = u16::from_le_bytes(elf_data[58..60].try_into().unwrap()) as usize;
365    let e_shnum = u16::from_le_bytes(elf_data[60..62].try_into().unwrap()) as usize;
366    let e_shstrndx = u16::from_le_bytes(elf_data[62..64].try_into().unwrap()) as usize;
367
368    let strtab = {
369        let sh = e_shoff + e_shstrndx * e_shentsize;
370        let off = u64::from_le_bytes(elf_data[sh + 24..sh + 32].try_into().unwrap()) as usize;
371        let sz = u64::from_le_bytes(elf_data[sh + 32..sh + 40].try_into().unwrap()) as usize;
372        &elf_data[off..off + sz]
373    };
374
375    let mut hits: Vec<(u64, &[u8])> = Vec::new();
376    for i in 0..e_shnum {
377        let sh = e_shoff + i * e_shentsize;
378        if sh + e_shentsize > elf_data.len() {
379            break;
380        }
381        let name_off = u32::from_le_bytes(elf_data[sh..sh + 4].try_into().unwrap()) as usize;
382        let addr = u64::from_le_bytes(elf_data[sh + 16..sh + 24].try_into().unwrap());
383        let file_off = u64::from_le_bytes(elf_data[sh + 24..sh + 32].try_into().unwrap()) as usize;
384        let size = u64::from_le_bytes(elf_data[sh + 32..sh + 40].try_into().unwrap()) as usize;
385        let name = if name_off < strtab.len() {
386            let end = strtab[name_off..].iter().position(|&b| b == 0).unwrap_or(0);
387            std::str::from_utf8(&strtab[name_off..name_off + end]).unwrap_or("")
388        } else {
389            ""
390        };
391        if name == section_name && file_off + size <= elf_data.len() {
392            hits.push((addr, &elf_data[file_off..file_off + size]));
393        }
394    }
395    hits.sort_by_key(|&(addr, _)| addr);
396    Ok(hits.into_iter().map(|(_, bytes)| bytes).collect())
397}
398
399/// Parse ELF with full relocation info.
400fn parse_linked_elf(data: &[u8]) -> Result<LinkedElf, TranspileError> {
401    if data.len() < 64 || data[0..4] != [0x7F, b'E', b'L', b'F'] {
402        return Err(TranspileError::ElfParse("not an ELF file".into()));
403    }
404
405    let is_64bit = match data[4] {
406        1 => false,
407        2 => true,
408        _ => return Err(TranspileError::ElfParse("unsupported ELF class".into())),
409    };
410
411    if !is_64bit {
412        return Err(TranspileError::ElfParse(
413            "linker requires 64-bit ELF (rv64em)".into(),
414        ));
415    }
416
417    // ELF64 header fields
418    let e_entry = u64::from_le_bytes(data[24..32].try_into().unwrap());
419    let e_shoff = u64::from_le_bytes(data[40..48].try_into().unwrap()) as usize;
420    let e_shentsize = u16::from_le_bytes(data[58..60].try_into().unwrap()) as usize;
421    let e_shnum = u16::from_le_bytes(data[60..62].try_into().unwrap()) as usize;
422    let e_shstrndx = u16::from_le_bytes(data[62..64].try_into().unwrap()) as usize;
423
424    // Section name string table
425    let strtab = {
426        let sh = e_shoff + e_shstrndx * e_shentsize;
427        let off = u64::from_le_bytes(data[sh + 24..sh + 32].try_into().unwrap()) as usize;
428        let sz = u64::from_le_bytes(data[sh + 32..sh + 40].try_into().unwrap()) as usize;
429        &data[off..off + sz]
430    };
431
432    let get_name = |name_off: usize| -> &str {
433        if name_off >= strtab.len() {
434            return "";
435        }
436        let end = strtab[name_off..].iter().position(|&b| b == 0).unwrap_or(0);
437        std::str::from_utf8(&strtab[name_off..name_off + end]).unwrap_or("")
438    };
439
440    // First pass: collect section metadata
441    struct SectionInfo {
442        name_off: usize,
443        sh_type: u32,
444        flags: u64,
445        addr: u64,
446        file_off: usize,
447        size: usize,
448        link: usize,
449        _info: usize,
450    }
451
452    let mut sections = Vec::with_capacity(e_shnum);
453    for i in 0..e_shnum {
454        let sh = e_shoff + i * e_shentsize;
455        if sh + e_shentsize > data.len() {
456            break;
457        }
458        sections.push(SectionInfo {
459            name_off: u32::from_le_bytes(data[sh..sh + 4].try_into().unwrap()) as usize,
460            sh_type: u32::from_le_bytes(data[sh + 4..sh + 8].try_into().unwrap()),
461            flags: u64::from_le_bytes(data[sh + 8..sh + 16].try_into().unwrap()),
462            addr: u64::from_le_bytes(data[sh + 16..sh + 24].try_into().unwrap()),
463            file_off: u64::from_le_bytes(data[sh + 24..sh + 32].try_into().unwrap()) as usize,
464            size: u64::from_le_bytes(data[sh + 32..sh + 40].try_into().unwrap()) as usize,
465            link: u32::from_le_bytes(data[sh + 40..sh + 44].try_into().unwrap()) as usize,
466            _info: u32::from_le_bytes(data[sh + 44..sh + 48].try_into().unwrap()) as usize,
467        });
468    }
469
470    // Collect code sections, ro sections, rw sections
471    let mut code_sections = Vec::new();
472    let mut ro_sections: Vec<(u64, usize, Vec<u8>)> = Vec::new();
473    let mut rw_sections: Vec<(u64, usize, Option<Vec<u8>>)> = Vec::new();
474    let mut rela_section_indices = Vec::new();
475    let mut symtab_idx = None;
476
477    for (i, s) in sections.iter().enumerate() {
478        let name = get_name(s.name_off);
479        let is_alloc = s.flags & 2 != 0;
480        let is_exec = s.flags & 4 != 0;
481        let is_write = s.flags & 1 != 0;
482
483        if s.sh_type == 2 {
484            // SYMTAB
485            symtab_idx = Some(i);
486        }
487        if s.sh_type == 4 {
488            // RELA
489            rela_section_indices.push(i);
490        }
491        if !is_alloc || s.sh_type == 0 {
492            continue;
493        }
494
495        if is_exec && s.file_off + s.size <= data.len() {
496            code_sections.push((
497                s.file_off as u64,
498                s.addr,
499                data[s.file_off..s.file_off + s.size].to_vec(),
500            ));
501        } else if !is_exec
502            && (name.starts_with(".rodata")
503                || name == ".srodata"
504                || name.starts_with(".data.rel.ro"))
505        {
506            if s.file_off + s.size <= data.len() {
507                ro_sections.push((
508                    s.addr,
509                    s.size,
510                    data[s.file_off..s.file_off + s.size].to_vec(),
511                ));
512            }
513        } else if is_write {
514            if s.sh_type == 8 {
515                // NOBITS (.bss)
516                rw_sections.push((s.addr, s.size, None));
517            } else if s.file_off + s.size <= data.len() {
518                rw_sections.push((
519                    s.addr,
520                    s.size,
521                    Some(data[s.file_off..s.file_off + s.size].to_vec()),
522                ));
523            }
524        }
525    }
526
527    // Parse symbol table
528    let mut symbols_by_idx: Vec<(String, u64)> = Vec::new();
529    if let Some(si) = symtab_idx {
530        let s = &sections[si];
531        // Get associated string table
532        let sym_strtab = {
533            let ss = &sections[s.link];
534            &data[ss.file_off..ss.file_off + ss.size]
535        };
536        // ELF64 symbol = 24 bytes
537        let count = s.size / 24;
538        for j in 0..count {
539            let off = s.file_off + j * 24;
540            if off + 24 > data.len() {
541                break;
542            }
543            let st_name = u32::from_le_bytes(data[off..off + 4].try_into().unwrap()) as usize;
544            let st_value = u64::from_le_bytes(data[off + 8..off + 16].try_into().unwrap());
545
546            let name = {
547                if st_name < sym_strtab.len() {
548                    let end = sym_strtab[st_name..]
549                        .iter()
550                        .position(|&b| b == 0)
551                        .unwrap_or(0);
552                    std::str::from_utf8(&sym_strtab[st_name..st_name + end]).unwrap_or("")
553                } else {
554                    ""
555                }
556            };
557
558            symbols_by_idx.push((name.to_string(), st_value));
559        }
560    }
561
562    // Compute PVM memory layout
563    // PVM linear memory: [stack: 0..s) [ro: s..s+|o|) [rw: s+P(|o|)..] [heap...]
564    // We set stack_size = minimum power-of-2 page boundary that contains all ro section addrs.
565    let ro_min = ro_sections.iter().map(|(a, _, _)| *a).min().unwrap_or(0);
566    let ro_max = ro_sections
567        .iter()
568        .map(|(a, sz, _)| *a + *sz as u64)
569        .max()
570        .unwrap_or(0);
571
572    // Round ro_min down to page boundary for stack_size.
573    // Minimum 4 pages (16KB) so the stack is usable even without rodata.
574    let page_size: u64 = 4096;
575    let stack_size = if ro_min > 0 {
576        (ro_min / page_size) * page_size
577    } else {
578        4 * page_size
579    };
580
581    // Build ro_data blob: section data placed at (section_addr - stack_size) offset
582    let ro_blob_size = if ro_max > stack_size {
583        (ro_max - stack_size) as usize
584    } else {
585        0
586    };
587    let mut ro_data = vec![0u8; ro_blob_size];
588    for (addr, sz, d) in &ro_sections {
589        let off = (*addr - stack_size) as usize;
590        if off + sz <= ro_data.len() {
591            ro_data[off..off + sz].copy_from_slice(d);
592        }
593    }
594
595    // RW data: placed after ro_data (with page rounding)
596    let ro_pages = ro_data.len().div_ceil(page_size as usize);
597    let rw_pvm_base = stack_size + (ro_pages as u64 * page_size);
598    let mut rw_data = Vec::new();
599    if !rw_sections.is_empty() {
600        let rw_min = rw_sections.iter().map(|(a, _, _)| *a).min().unwrap();
601        let rw_max = rw_sections
602            .iter()
603            .map(|(a, sz, _)| *a + *sz as u64)
604            .max()
605            .unwrap();
606        let rw_blob_size = (rw_max - rw_pvm_base.min(rw_min)) as usize;
607        rw_data = vec![0u8; rw_blob_size];
608        for (addr, sz, d) in &rw_sections {
609            let off = (*addr - rw_pvm_base.min(rw_min)) as usize;
610            if let Some(d) = d
611                && off + sz <= rw_data.len()
612            {
613                rw_data[off..off + sz].copy_from_slice(d);
614            }
615        }
616    }
617
618    // Parse relocations in two passes:
619    // Pass 1: collect HI20 targets and CALL_PLT targets
620    // Pass 2: resolve LO12 by looking up their paired HI20
621    let mut hi20_targets: HashMap<u64, u64> = HashMap::new();
622    let mut lo12_targets: HashMap<u64, u64> = HashMap::new();
623    let mut call_targets: HashMap<u64, u64> = HashMap::new();
624
625    // Temporary: collect LO12 entries for pass 2
626    let mut lo12_entries: Vec<(u64, u64)> = Vec::new(); // (lo12_addr, hi20_addr)
627    let mut abs64_relocs: Vec<(u64, u64, u8)> = Vec::new(); // (offset, target, entry_size)
628    let mut sub32_relocs: Vec<(u64, u64)> = Vec::new();
629    // Code address ranges for detecting code pointers
630    let code_ranges: Vec<(u64, u64)> = code_sections
631        .iter()
632        .map(|(_, vaddr, data)| (*vaddr, *vaddr + data.len() as u64))
633        .collect();
634
635    for &ri in &rela_section_indices {
636        let rs = &sections[ri];
637        let count = rs.size / 24;
638        for j in 0..count {
639            let off = rs.file_off + j * 24;
640            if off + 24 > data.len() {
641                break;
642            }
643            let r_offset = u64::from_le_bytes(data[off..off + 8].try_into().unwrap());
644            let r_info = u64::from_le_bytes(data[off + 8..off + 16].try_into().unwrap());
645            let r_addend = i64::from_le_bytes(data[off + 16..off + 24].try_into().unwrap());
646            let r_type = (r_info & 0xFFFFFFFF) as u32;
647            let r_sym = (r_info >> 32) as usize;
648
649            let rtype = match RelocType::from_raw(r_type) {
650                Some(t) => t,
651                None => continue,
652            };
653
654            let sym_value = if r_sym < symbols_by_idx.len() {
655                symbols_by_idx[r_sym].1
656            } else {
657                0
658            };
659
660            let target_addr = (sym_value as i64 + r_addend) as u64;
661
662            match rtype {
663                RelocType::Abs32 => {
664                    let is_code_ptr = code_ranges
665                        .iter()
666                        .any(|(lo, hi)| target_addr >= *lo && target_addr < *hi);
667                    if is_code_ptr {
668                        abs64_relocs.push((r_offset, target_addr, 4));
669                    }
670                }
671                RelocType::Abs64 => {
672                    let is_code_ptr = code_ranges
673                        .iter()
674                        .any(|(lo, hi)| target_addr >= *lo && target_addr < *hi);
675                    if is_code_ptr {
676                        abs64_relocs.push((r_offset, target_addr, 8));
677                    }
678                }
679                RelocType::Add32 => {
680                    let is_code_ptr = code_ranges
681                        .iter()
682                        .any(|(lo, hi)| target_addr >= *lo && target_addr < *hi);
683                    if is_code_ptr {
684                        abs64_relocs.push((r_offset, target_addr, 4));
685                    }
686                }
687                RelocType::Sub32 => {
688                    // R_RISCV_SUB32: the subtracted address (typically table base).
689                    sub32_relocs.push((r_offset, target_addr));
690                }
691                RelocType::CallPlt => {
692                    call_targets.insert(r_offset, target_addr);
693                }
694                RelocType::PcrelHi20 => {
695                    // target_addr is the resolved data/function address
696                    hi20_targets.insert(r_offset, target_addr);
697                }
698                RelocType::PcrelLo12I | RelocType::PcrelLo12S => {
699                    // sym_value is the address of the paired HI20 instruction.
700                    // r_offset is the address of this LO12 instruction.
701                    lo12_entries.push((r_offset, sym_value));
702                }
703            }
704        }
705    }
706
707    // Pass 2: resolve LO12 targets by looking up paired HI20
708    for (lo12_addr, hi20_addr) in lo12_entries {
709        if let Some(&data_addr) = hi20_targets.get(&hi20_addr) {
710            lo12_targets.insert(lo12_addr, data_addr);
711        }
712    }
713
714    let heap_pages = 16u32; // 64KB heap
715
716    Ok(LinkedElf {
717        is_64bit,
718        code_sections,
719        ro_data,
720        _ro_base: stack_size,
721        rw_data,
722        _rw_base: rw_pvm_base,
723        stack_size: stack_size as u32,
724        heap_pages,
725        hi20_targets,
726        lo12_targets,
727        call_targets,
728        abs_code_ptrs: abs64_relocs,
729        sub32_relocs,
730        code_ranges,
731        entry_vaddr: e_entry,
732    })
733}
734
735/// Rewrite code pointers in data sections (LLVM switch/jump tables, vtables).
736///
737/// Detects code pointers via:
738/// 1. R_RISCV_32/64 absolute relocations targeting code sections
739/// 2. R_RISCV_SUB32 relocations (relative jump table entries: value = target - table_base)
740/// 3. Heuristic scan for 8-byte values in rodata that match code addresses
741///
742/// Creates PVM jump table entries for each target and rewrites the data
743/// so that the loaded values are valid PVM djump addresses.
744fn rewrite_data_code_ptrs(
745    elf: &LinkedElf,
746    ctx: &mut TranslationContext,
747    ro_data: &mut [u8],
748    _rw_data: &mut [u8],
749) {
750    let ro_base = elf.stack_size as u64;
751    let is_code_addr = |addr: u64| -> bool {
752        elf.code_ranges
753            .iter()
754            .any(|(lo, hi)| addr >= *lo && addr < *hi)
755    };
756
757    struct Entry {
758        data_vaddr: u64,
759        rv_target: u64,
760        size: u8,
761        table_base_rv: Option<u64>,
762    }
763    let mut entries: Vec<Entry> = Vec::new();
764
765    // From absolute relocations (R_RISCV_32/64/ADD32).
766    // If a matching SUB32 exists at the same offset, this is a relative entry
767    // (ADD32/SUB32 pair for jump tables). Use the SUB32 target as table base.
768    for &(vaddr, target, size) in &elf.abs_code_ptrs {
769        let table_base = elf
770            .sub32_relocs
771            .iter()
772            .find(|(v, _)| *v == vaddr)
773            .map(|(_, base)| *base);
774        entries.push(Entry {
775            data_vaddr: vaddr,
776            rv_target: target,
777            size,
778            table_base_rv: table_base,
779        });
780    }
781
782    // SUB32 entries without matching ADD32 (shouldn't happen, but handle gracefully).
783    for &(data_vaddr, base_addr) in &elf.sub32_relocs {
784        if entries.iter().any(|e| e.data_vaddr == data_vaddr) {
785            continue; // Already handled via ADD32 pairing above
786        }
787        if data_vaddr >= ro_base {
788            let off = (data_vaddr - ro_base) as usize;
789            if off + 4 <= ro_data.len() {
790                let val = i32::from_le_bytes(ro_data[off..off + 4].try_into().unwrap());
791                let target = (base_addr as i64 + val as i64) as u64;
792                if is_code_addr(target) {
793                    entries.push(Entry {
794                        data_vaddr,
795                        rv_target: target,
796                        size: 4,
797                        table_base_rv: Some(base_addr),
798                    });
799                }
800            }
801        }
802    }
803
804    // Heuristic: 8-byte values in rodata that are code addresses
805    {
806        let mut off = 0;
807        while off + 8 <= ro_data.len() {
808            let val = u64::from_le_bytes(ro_data[off..off + 8].try_into().unwrap());
809            if is_code_addr(val) {
810                let vaddr = ro_base + off as u64;
811                if !entries.iter().any(|e| e.data_vaddr == vaddr) {
812                    entries.push(Entry {
813                        data_vaddr: vaddr,
814                        rv_target: val,
815                        size: 8,
816                        table_base_rv: None,
817                    });
818                }
819            }
820            off += 8;
821        }
822    }
823
824    if entries.is_empty() {
825        return;
826    }
827
828    let targets: std::collections::HashSet<u64> = entries.iter().map(|e| e.rv_target).collect();
829    let rv_to_jt = ctx.build_function_pointer_map(&targets);
830
831    for entry in &entries {
832        if let Some(&jt_addr) = rv_to_jt.get(&entry.rv_target)
833            && entry.data_vaddr >= ro_base
834            && (entry.data_vaddr - ro_base) as usize + entry.size as usize <= ro_data.len()
835        {
836            let off = (entry.data_vaddr - ro_base) as usize;
837            match (entry.size, entry.table_base_rv) {
838                (8, _) => {
839                    ro_data[off..off + 8].copy_from_slice(&(jt_addr as u64).to_le_bytes());
840                }
841                (4, None) => {
842                    ro_data[off..off + 4].copy_from_slice(&jt_addr.to_le_bytes());
843                }
844                (4, Some(rv_base)) => {
845                    // Relative entry: code does `lw off, table(idx); add target, off, base; jr target`.
846                    // base register holds the PVM mapping of rv_base (from load_imm).
847                    // new_val + pvm_base = jt_addr → new_val = jt_addr - pvm_base.
848                    let pvm_base = ctx
849                        .address_map
850                        .get(&rv_base)
851                        .copied()
852                        .unwrap_or(rv_base as u32);
853                    let new_val = (jt_addr as i64 - pvm_base as i64) as i32;
854                    ro_data[off..off + 4].copy_from_slice(&new_val.to_le_bytes());
855                }
856                _ => {}
857            }
858        }
859    }
860}
861
862fn translate_section_linked(
863    ctx: &mut TranslationContext,
864    data: &[u8],
865    base_addr: u64,
866    elf: &LinkedElf,
867) -> Result<(), TranspileError> {
868    let mut offset = 0;
869    while offset < data.len() {
870        let rv_addr = base_addr + offset as u64;
871        ctx.address_map.insert(rv_addr, ctx.code.len() as u32);
872
873        if offset + 4 > data.len() {
874            break;
875        }
876
877        let inst = u32::from_le_bytes([
878            data[offset],
879            data[offset + 1],
880            data[offset + 2],
881            data[offset + 3],
882        ]);
883
884        // Skip non-instruction bytes
885        if inst & 0x3 != 0x3 {
886            // Compressed instruction — not supported for rv64em
887            return Err(TranspileError::UnsupportedInstruction {
888                offset: rv_addr as usize,
889                detail: "compressed instruction in rv64em ELF".into(),
890            });
891        }
892
893        let opcode = inst & 0x7f;
894
895        // Check for relocation overrides
896        if opcode == 0x17 {
897            // AUIPC
898            let rd = ((inst >> 7) & 0x1f) as u8;
899
900            if let Some(&target_addr) = elf.call_targets.get(&rv_addr) {
901                // CALL_PLT: AUIPC+JALR pair for function call
902                // Peek at JALR to get link register
903                if offset + 8 <= data.len() {
904                    let jalr = u32::from_le_bytes([
905                        data[offset + 4],
906                        data[offset + 5],
907                        data[offset + 6],
908                        data[offset + 7],
909                    ]);
910                    let jalr_rd = ((jalr >> 7) & 0x1f) as u8;
911                    let ret_addr = rv_addr + 8;
912
913                    // Fused load_imm_jump: set return address and jump in one instruction
914                    ctx.emit_call(jalr_rd, ret_addr, target_addr)?;
915                    // Map the JALR address too
916                    ctx.address_map.insert(rv_addr + 4, ctx.code.len() as u32);
917                    offset += 8; // skip both AUIPC and JALR
918                    continue;
919                }
920            }
921
922            if let Some(&target_addr) = elf.hi20_targets.get(&rv_addr) {
923                // PCREL_HI20: AUIPC for data reference.
924                // Peek ahead: if the next instruction is a paired LO12 ADDI (nop),
925                // skip it and set pending_load_imm to enable cascading fusion
926                // with the instruction after (load_ind, store_ind, ALU, branch).
927                let next_addr = rv_addr + 4;
928                if offset + 8 <= data.len()
929                    && let Some(&_) = elf.lo12_targets.get(&next_addr)
930                {
931                    let next_inst = u32::from_le_bytes([
932                        data[offset + 4],
933                        data[offset + 5],
934                        data[offset + 6],
935                        data[offset + 7],
936                    ]);
937                    let next_opcode = next_inst & 0x7f;
938                    let next_funct3 = (next_inst >> 12) & 0x7;
939                    let next_rd = ((next_inst >> 7) & 0x1f) as u8;
940                    let next_rs1 = ((next_inst >> 15) & 0x1f) as u8;
941
942                    if next_opcode == 0x13 && next_funct3 == 0 && next_rs1 == rd {
943                        // LO12 ADDI: address is already complete from HI20.
944                        // Emit load_imm into the ADDI's destination register
945                        // and set pending_load_imm for cascading fusion.
946                        let dest = if next_rd != 0 { next_rd } else { rd };
947                        let pos = ctx.code.len();
948                        // If target is a code address (function pointer), load
949                        // a jump table address instead of the raw RISC-V address.
950                        let load_val = if ctx.is_code_addr(target_addr) {
951                            let jt_idx = ctx.jump_table.len();
952                            ctx.jump_table.push(0);
953                            ctx.return_fixups.push((jt_idx, target_addr));
954                            ((jt_idx + 1) * 2) as i64
955                        } else {
956                            target_addr as i64
957                        };
958                        ctx.emit_load_imm(dest, load_val)?;
959                        ctx.pending_load_imm = Some((dest, load_val, pos));
960                        ctx.address_map.insert(next_addr, ctx.code.len() as u32);
961                        offset += 8; // skip both AUIPC and ADDI
962                        continue;
963                    }
964                }
965
966                // No paired LO12 ADDI next — emit load_imm with pending tracking.
967                // This enables fusion with the next load/store/ALU/branch via
968                // pending_load_imm even when the LO12 is a LOAD or STORE directly.
969                let pos = ctx.code.len();
970                // If target is a code address, use jump table address.
971                let load_val = if ctx.is_code_addr(target_addr) {
972                    let jt_idx = ctx.jump_table.len();
973                    ctx.jump_table.push(0);
974                    ctx.return_fixups.push((jt_idx, target_addr));
975                    ((jt_idx + 1) * 2) as i64
976                } else {
977                    target_addr as i64
978                };
979                ctx.emit_load_imm(rd, load_val)?;
980                ctx.pending_load_imm = Some((rd, load_val, pos));
981                offset += 4;
982                continue;
983            }
984        }
985
986        // Check if this instruction has a PCREL_LO12 relocation.
987        // If so, the rs1 register already contains the full resolved address
988        // (loaded by the paired AUIPC/HI20 above). Override immediate to 0
989        // and route through translate_load/translate_store to enable fusion
990        // with the pending_load_imm set by the HI20 handler above.
991        if let Some(&_data_addr) = elf.lo12_targets.get(&rv_addr) {
992            let rd = ((inst >> 7) & 0x1f) as u8;
993            let rs1 = ((inst >> 15) & 0x1f) as u8;
994            let funct3 = (inst >> 12) & 0x7;
995
996            if opcode == 0x13 && funct3 == 0 {
997                // ADDI rd, rs1, lo12 → address already loaded by HI20.
998                // This path is reached when the HI20 peek-ahead didn't consume
999                // this ADDI (e.g., non-adjacent HI20/LO12 pair).
1000                if rd != rs1 && rd != 0 {
1001                    let pvm_src = ctx.require_reg(rs1)?;
1002                    let pvm_dst = ctx.require_reg(rd)?;
1003                    ctx.emit_inst(100); // move_reg
1004                    ctx.emit_data(pvm_dst | (pvm_src << 4));
1005                } else {
1006                    ctx.emit_inst(1); // fallthrough
1007                }
1008                offset += 4;
1009                continue;
1010            } else if opcode == 0x03 {
1011                // LOAD rd, lo12(rs1) → route through translate_load with imm=0.
1012                // If pending_load_imm is set (from HI20), this fuses into a
1013                // direct load (load_* rd, addr) — saving one instruction.
1014                ctx.translate_load(funct3, rd, rs1, 0)?;
1015                offset += 4;
1016                continue;
1017            } else if opcode == 0x23 {
1018                // STORE rs2, lo12(rs1) → route through translate_store with imm=0.
1019                let rs2 = ((inst >> 20) & 0x1f) as u8;
1020                ctx.translate_store(funct3, rs1, rs2, 0)?;
1021                offset += 4;
1022                continue;
1023            }
1024            // Fallthrough: translate normally (shouldn't happen for well-formed code)
1025        }
1026
1027        // Normal instruction translation
1028        let consumed = ctx.translate_instruction(data, offset, base_addr)?;
1029        offset += consumed;
1030    }
1031
1032    // Flush any pending LUI/AUIPC at section boundary
1033    ctx.flush_pending()?;
1034
1035    Ok(())
1036}