1use crate::TranspileError;
8use std::collections::HashMap;
9
10#[derive(Debug, Clone, Copy, PartialEq, Eq)]
12pub(crate) enum RelocType {
13 Abs32,
15 Abs64,
17 CallPlt,
19 PcrelHi20,
21 PcrelLo12I,
23 PcrelLo12S,
25 Add32,
27 Sub32,
29}
30
31impl RelocType {
32 fn from_raw(r: u32) -> Option<Self> {
33 match r {
34 1 => Some(Self::Abs32),
35 2 => Some(Self::Abs64),
36 19 => Some(Self::CallPlt),
37 23 => Some(Self::PcrelHi20),
38 24 => Some(Self::PcrelLo12I),
39 25 => Some(Self::PcrelLo12S),
40 35 => Some(Self::Add32),
41 39 => Some(Self::Sub32),
42 _ => None,
43 }
44 }
45}
46
47pub(crate) struct LinkedElf {
49 pub(crate) code_sections: Vec<(u64, u64, Vec<u8>)>,
51 pub(crate) ro_data: Vec<u8>,
53 pub(crate) rw_data: Vec<u8>,
55 pub(crate) stack_size: u32,
57 pub(crate) heap_pages: u32,
59 pub(crate) hi20_targets: HashMap<u64, u64>,
61 pub(crate) lo12_targets: HashMap<u64, u64>,
63 pub(crate) lo12_to_hi20: HashMap<u64, u64>,
68 pub(crate) call_targets: HashMap<u64, u64>,
70 pub(crate) abs_code_ptrs: Vec<(u64, u64, u8)>,
72 pub(crate) abs_data_ptrs: Vec<(u64, u64, u8)>,
77 pub(crate) sub32_relocs: Vec<(u64, u64)>,
79 pub(crate) rw_base: u64,
82 pub(crate) code_ranges: Vec<(u64, u64)>,
84}
85
86pub(crate) fn find_all_section_bytes<'a>(
90 elf_data: &'a [u8],
91 section_name: &str,
92) -> Result<Vec<&'a [u8]>, TranspileError> {
93 if elf_data.len() < 64 || elf_data[0..4] != [0x7F, b'E', b'L', b'F'] {
94 return Err(TranspileError::ElfParse("not an ELF file".into()));
95 }
96 if elf_data[4] != 2 {
97 return Err(TranspileError::ElfParse("only 64-bit ELF supported".into()));
98 }
99 let e_shoff = u64::from_le_bytes(elf_data[40..48].try_into().unwrap()) as usize;
100 let e_shentsize = u16::from_le_bytes(elf_data[58..60].try_into().unwrap()) as usize;
101 let e_shnum = u16::from_le_bytes(elf_data[60..62].try_into().unwrap()) as usize;
102 let e_shstrndx = u16::from_le_bytes(elf_data[62..64].try_into().unwrap()) as usize;
103
104 let strtab = {
105 let sh = e_shoff + e_shstrndx * e_shentsize;
106 let off = u64::from_le_bytes(elf_data[sh + 24..sh + 32].try_into().unwrap()) as usize;
107 let sz = u64::from_le_bytes(elf_data[sh + 32..sh + 40].try_into().unwrap()) as usize;
108 &elf_data[off..off + sz]
109 };
110
111 let mut hits: Vec<(u64, &[u8])> = Vec::new();
112 for i in 0..e_shnum {
113 let sh = e_shoff + i * e_shentsize;
114 if sh + e_shentsize > elf_data.len() {
115 break;
116 }
117 let name_off = u32::from_le_bytes(elf_data[sh..sh + 4].try_into().unwrap()) as usize;
118 let addr = u64::from_le_bytes(elf_data[sh + 16..sh + 24].try_into().unwrap());
119 let file_off = u64::from_le_bytes(elf_data[sh + 24..sh + 32].try_into().unwrap()) as usize;
120 let size = u64::from_le_bytes(elf_data[sh + 32..sh + 40].try_into().unwrap()) as usize;
121 let name = if name_off < strtab.len() {
122 let end = strtab[name_off..].iter().position(|&b| b == 0).unwrap_or(0);
123 std::str::from_utf8(&strtab[name_off..name_off + end]).unwrap_or("")
124 } else {
125 ""
126 };
127 if name == section_name && file_off + size <= elf_data.len() {
128 hits.push((addr, &elf_data[file_off..file_off + size]));
129 }
130 }
131 hits.sort_by_key(|&(addr, _)| addr);
132 Ok(hits.into_iter().map(|(_, bytes)| bytes).collect())
133}
134
135pub(crate) fn parse_linked_elf(data: &[u8]) -> Result<LinkedElf, TranspileError> {
137 if data.len() < 64 || data[0..4] != [0x7F, b'E', b'L', b'F'] {
138 return Err(TranspileError::ElfParse("not an ELF file".into()));
139 }
140
141 match data[4] {
142 2 => {}
143 1 => {
144 return Err(TranspileError::ElfParse(
145 "linker requires 64-bit ELF (rv64em)".into(),
146 ));
147 }
148 _ => return Err(TranspileError::ElfParse("unsupported ELF class".into())),
149 }
150
151 let e_shoff = u64::from_le_bytes(data[40..48].try_into().unwrap()) as usize;
153 let e_shentsize = u16::from_le_bytes(data[58..60].try_into().unwrap()) as usize;
154 let e_shnum = u16::from_le_bytes(data[60..62].try_into().unwrap()) as usize;
155 let e_shstrndx = u16::from_le_bytes(data[62..64].try_into().unwrap()) as usize;
156
157 let strtab = {
159 let sh = e_shoff + e_shstrndx * e_shentsize;
160 let off = u64::from_le_bytes(data[sh + 24..sh + 32].try_into().unwrap()) as usize;
161 let sz = u64::from_le_bytes(data[sh + 32..sh + 40].try_into().unwrap()) as usize;
162 &data[off..off + sz]
163 };
164
165 let get_name = |name_off: usize| -> &str {
166 if name_off >= strtab.len() {
167 return "";
168 }
169 let end = strtab[name_off..].iter().position(|&b| b == 0).unwrap_or(0);
170 std::str::from_utf8(&strtab[name_off..name_off + end]).unwrap_or("")
171 };
172
173 struct SectionInfo {
175 name_off: usize,
176 sh_type: u32,
177 flags: u64,
178 addr: u64,
179 file_off: usize,
180 size: usize,
181 link: usize,
182 _info: usize,
183 }
184
185 let mut sections = Vec::with_capacity(e_shnum);
186 for i in 0..e_shnum {
187 let sh = e_shoff + i * e_shentsize;
188 if sh + e_shentsize > data.len() {
189 break;
190 }
191 sections.push(SectionInfo {
192 name_off: u32::from_le_bytes(data[sh..sh + 4].try_into().unwrap()) as usize,
193 sh_type: u32::from_le_bytes(data[sh + 4..sh + 8].try_into().unwrap()),
194 flags: u64::from_le_bytes(data[sh + 8..sh + 16].try_into().unwrap()),
195 addr: u64::from_le_bytes(data[sh + 16..sh + 24].try_into().unwrap()),
196 file_off: u64::from_le_bytes(data[sh + 24..sh + 32].try_into().unwrap()) as usize,
197 size: u64::from_le_bytes(data[sh + 32..sh + 40].try_into().unwrap()) as usize,
198 link: u32::from_le_bytes(data[sh + 40..sh + 44].try_into().unwrap()) as usize,
199 _info: u32::from_le_bytes(data[sh + 44..sh + 48].try_into().unwrap()) as usize,
200 });
201 }
202
203 let mut code_sections = Vec::new();
205 let mut ro_sections: Vec<(u64, usize, Vec<u8>)> = Vec::new();
206 let mut rw_sections: Vec<(u64, usize, Option<Vec<u8>>)> = Vec::new();
207 let mut rela_section_indices = Vec::new();
208 let mut symtab_idx = None;
209
210 for (i, s) in sections.iter().enumerate() {
211 let name = get_name(s.name_off);
212 let is_alloc = s.flags & 2 != 0;
213 let is_exec = s.flags & 4 != 0;
214 let is_write = s.flags & 1 != 0;
215
216 if s.sh_type == 2 {
217 symtab_idx = Some(i);
219 }
220 if s.sh_type == 4 {
221 rela_section_indices.push(i);
223 }
224 if !is_alloc || s.sh_type == 0 {
225 continue;
226 }
227
228 if is_exec && s.file_off + s.size <= data.len() {
229 code_sections.push((
230 s.file_off as u64,
231 s.addr,
232 data[s.file_off..s.file_off + s.size].to_vec(),
233 ));
234 } else if !is_exec
235 && (name.starts_with(".rodata")
236 || name == ".srodata"
237 || name.starts_with(".data.rel.ro"))
238 {
239 if s.file_off + s.size <= data.len() {
240 ro_sections.push((
241 s.addr,
242 s.size,
243 data[s.file_off..s.file_off + s.size].to_vec(),
244 ));
245 }
246 } else if is_write {
247 if s.sh_type == 8 {
248 rw_sections.push((s.addr, s.size, None));
250 } else if s.file_off + s.size <= data.len() {
251 rw_sections.push((
252 s.addr,
253 s.size,
254 Some(data[s.file_off..s.file_off + s.size].to_vec()),
255 ));
256 }
257 }
258 }
259
260 let mut symbols_by_idx: Vec<(String, u64)> = Vec::new();
262 if let Some(si) = symtab_idx {
263 let s = §ions[si];
264 let sym_strtab = {
265 let ss = §ions[s.link];
266 &data[ss.file_off..ss.file_off + ss.size]
267 };
268 let count = s.size / 24;
269 for j in 0..count {
270 let off = s.file_off + j * 24;
271 if off + 24 > data.len() {
272 break;
273 }
274 let st_name = u32::from_le_bytes(data[off..off + 4].try_into().unwrap()) as usize;
275 let st_value = u64::from_le_bytes(data[off + 8..off + 16].try_into().unwrap());
276
277 let name = {
278 if st_name < sym_strtab.len() {
279 let end = sym_strtab[st_name..]
280 .iter()
281 .position(|&b| b == 0)
282 .unwrap_or(0);
283 std::str::from_utf8(&sym_strtab[st_name..st_name + end]).unwrap_or("")
284 } else {
285 ""
286 }
287 };
288
289 symbols_by_idx.push((name.to_string(), st_value));
290 }
291 }
292
293 let ro_min = ro_sections.iter().map(|(a, _, _)| *a).min().unwrap_or(0);
295 let ro_max = ro_sections
296 .iter()
297 .map(|(a, sz, _)| *a + *sz as u64)
298 .max()
299 .unwrap_or(0);
300
301 let page_size: u64 = 4096;
302 let stack_size = if ro_min > 0 {
303 (ro_min / page_size) * page_size
304 } else {
305 4 * page_size
306 };
307
308 let ro_blob_size = if ro_max > stack_size {
309 (ro_max - stack_size) as usize
310 } else {
311 0
312 };
313 let mut ro_data = vec![0u8; ro_blob_size];
314 for (addr, sz, d) in &ro_sections {
315 let off = (*addr - stack_size) as usize;
316 if off + sz <= ro_data.len() {
317 ro_data[off..off + sz].copy_from_slice(d);
318 }
319 }
320
321 let ro_pages = ro_data.len().div_ceil(page_size as usize);
322 let rw_pvm_base = stack_size + (ro_pages as u64 * page_size);
323 let mut rw_data = Vec::new();
324 let mut rw_base = rw_pvm_base;
327 if !rw_sections.is_empty() {
328 let rw_min = rw_sections.iter().map(|(a, _, _)| *a).min().unwrap();
329 let rw_max = rw_sections
330 .iter()
331 .map(|(a, sz, _)| *a + *sz as u64)
332 .max()
333 .unwrap();
334 rw_base = rw_pvm_base.min(rw_min);
335 let rw_blob_size = (rw_max - rw_base) as usize;
336 rw_data = vec![0u8; rw_blob_size];
337 for (addr, sz, d) in &rw_sections {
338 let off = (*addr - rw_base) as usize;
339 if let Some(d) = d
340 && off + sz <= rw_data.len()
341 {
342 rw_data[off..off + sz].copy_from_slice(d);
343 }
344 }
345 }
346
347 let mut hi20_targets: HashMap<u64, u64> = HashMap::new();
348 let mut lo12_targets: HashMap<u64, u64> = HashMap::new();
349 let mut lo12_to_hi20: HashMap<u64, u64> = HashMap::new();
350 let mut call_targets: HashMap<u64, u64> = HashMap::new();
351
352 let mut lo12_entries: Vec<(u64, u64)> = Vec::new();
353 let mut abs64_relocs: Vec<(u64, u64, u8)> = Vec::new();
354 let mut abs_data_relocs: Vec<(u64, u64, u8)> = Vec::new();
355 let mut sub32_relocs: Vec<(u64, u64)> = Vec::new();
356 let code_ranges: Vec<(u64, u64)> = code_sections
357 .iter()
358 .map(|(_, vaddr, data)| (*vaddr, *vaddr + data.len() as u64))
359 .collect();
360
361 for &ri in &rela_section_indices {
362 let rs = §ions[ri];
363 let count = rs.size / 24;
364 for j in 0..count {
365 let off = rs.file_off + j * 24;
366 if off + 24 > data.len() {
367 break;
368 }
369 let r_offset = u64::from_le_bytes(data[off..off + 8].try_into().unwrap());
370 let r_info = u64::from_le_bytes(data[off + 8..off + 16].try_into().unwrap());
371 let r_addend = i64::from_le_bytes(data[off + 16..off + 24].try_into().unwrap());
372 let r_type = (r_info & 0xFFFFFFFF) as u32;
373 let r_sym = (r_info >> 32) as usize;
374
375 let rtype = match RelocType::from_raw(r_type) {
376 Some(t) => t,
377 None => continue,
378 };
379
380 let sym_value = if r_sym < symbols_by_idx.len() {
381 symbols_by_idx[r_sym].1
382 } else {
383 0
384 };
385
386 let target_addr = (sym_value as i64 + r_addend) as u64;
387
388 match rtype {
389 RelocType::Abs32 => {
390 let is_code_ptr = code_ranges
391 .iter()
392 .any(|(lo, hi)| target_addr >= *lo && target_addr < *hi);
393 if is_code_ptr {
394 abs64_relocs.push((r_offset, target_addr, 4));
395 } else {
396 abs_data_relocs.push((r_offset, target_addr, 4));
397 }
398 }
399 RelocType::Abs64 => {
400 let is_code_ptr = code_ranges
401 .iter()
402 .any(|(lo, hi)| target_addr >= *lo && target_addr < *hi);
403 if is_code_ptr {
404 abs64_relocs.push((r_offset, target_addr, 8));
405 } else {
406 abs_data_relocs.push((r_offset, target_addr, 8));
407 }
408 }
409 RelocType::Add32 => {
410 let is_code_ptr = code_ranges
411 .iter()
412 .any(|(lo, hi)| target_addr >= *lo && target_addr < *hi);
413 if is_code_ptr {
414 abs64_relocs.push((r_offset, target_addr, 4));
415 }
416 }
417 RelocType::Sub32 => {
418 sub32_relocs.push((r_offset, target_addr));
419 }
420 RelocType::CallPlt => {
421 call_targets.insert(r_offset, target_addr);
422 }
423 RelocType::PcrelHi20 => {
424 hi20_targets.insert(r_offset, target_addr);
425 }
426 RelocType::PcrelLo12I | RelocType::PcrelLo12S => {
427 lo12_entries.push((r_offset, sym_value));
428 }
429 }
430 }
431 }
432
433 for (lo12_addr, hi20_addr) in lo12_entries {
434 if let Some(&data_addr) = hi20_targets.get(&hi20_addr) {
435 lo12_targets.insert(lo12_addr, data_addr);
436 lo12_to_hi20.insert(lo12_addr, hi20_addr);
437 }
438 }
439
440 let heap_pages = 16u32; Ok(LinkedElf {
443 code_sections,
444 ro_data,
445 rw_data,
446 stack_size: stack_size as u32,
447 heap_pages,
448 hi20_targets,
449 lo12_targets,
450 lo12_to_hi20,
451 call_targets,
452 abs_code_ptrs: abs64_relocs,
453 abs_data_ptrs: abs_data_relocs,
454 sub32_relocs,
455 rw_base,
456 code_ranges,
457 })
458}