Skip to main content

javm_transpiler/
program.rs

1//! JAR program blob format — capability manifest.
2//!
3//! The blob is a capability manifest: a list of initial capabilities
4//! (CODE and DATA) with their contents, plus invocation directives.
5//!
6//! Layout:
7//! ```text
8//! Header:
9//!   magic: u32              'JAR\x02'
10//!   memory_pages: u32       total Untyped budget
11//!   cap_count: u8           number of initial capabilities
12//!   init_cap: u8            cap_index of the **initialize CODE cap**
13//!                            run by Vault.initialize. The init program
14//!                            is responsible for placing a callable-shaped
15//!                            FrameRef at bare-Frame slot 4 before halting.
16//!
17//! Capabilities[cap_count]:
18//!   cap[i]: {
19//!     cap_index: u8         slot in VM's cap table
20//!     cap_type: u8          0 = CODE, 1 = DATA
21//!     page_count: u32       number of pages (DATA only)
22//!     data_offset: u32      offset into blob's data section
23//!     data_len: u32         bytes of initial data (0 = zero-filled)
24//!   }
25//!
26//! Data section:
27//!   (variable-length, referenced by capabilities)
28//! ```
29//!
30//! In the v3 model the kernel will eventually consume `Image.memory_mappings`
31//! to set up DATA-cap mappings declaratively at instance init. Until that
32//! lands, transpiled chain Images carry an empty mapping list; the SP
33//! value baked into `EndpointDef.initial_regs` makes the metadata
34//! correct for when mappings come online.
35
36/// Memory-mapping access mode tracked by [`ProgramLayout`](crate::layout::ProgramLayout) for the
37/// stack / ro / rw / heap regions. Persistent mappings (declarative
38/// `Image.memory_mappings`) will translate this into the
39/// corresponding `MappingSource::Persistent(...)` entries.
40#[derive(Debug, Clone, Copy, PartialEq, Eq)]
41pub enum Access {
42    RO,
43    RW,
44}
45
46/// JAR magic: 'J','A','R', 0x02.
47pub const JAR_MAGIC: u32 = u32::from_le_bytes([b'J', b'A', b'R', 0x02]);
48
49/// Header size: magic(4) + memory_pages(4) + cap_count(1) + init_cap(1) = 10.
50const HEADER_SIZE: usize = 10;
51
52/// Per-cap entry size: cap_index(1) + cap_type(1) + page_count(4)
53///   + data_offset(4) + data_len(4) = 14.
54const CAP_ENTRY_SIZE: usize = 14;
55
56/// Cap type discriminator.
57#[derive(Debug, Clone, Copy, PartialEq, Eq)]
58#[repr(u8)]
59pub enum CapEntryType {
60    Code = 0,
61    Data = 1,
62}
63
64/// A single capability entry in the manifest. DATA caps carry only
65/// `(cap_index, page_count, data_offset, data_len)`; v3 chain Images
66/// will eventually express their data regions via
67/// `Image.memory_mappings` directly.
68#[derive(Debug, Clone)]
69pub struct CapManifestEntry {
70    /// Slot in the VM's cap table.
71    pub cap_index: u8,
72    /// Capability type.
73    pub cap_type: CapEntryType,
74    /// Number of pages (DATA only, ignored for CODE).
75    pub page_count: u32,
76    /// Offset into the blob's data section (0 = no data).
77    pub data_offset: u32,
78    /// Bytes of initial data (0 = zero-filled for DATA, empty for CODE).
79    pub data_len: u32,
80}
81
82/// Parsed JAR header.
83#[derive(Debug, Clone)]
84pub struct ProgramHeader {
85    /// Total Untyped page budget.
86    pub memory_pages: u32,
87    /// Number of capabilities in the manifest.
88    pub cap_count: u8,
89    /// Cap index of the **initialize CODE cap** — the program run by
90    /// `Vault.initialize`. The init program decides what becomes the
91    /// public Callable (placed at bare-Frame slot 4 before halting).
92    pub init_cap: u8,
93}
94
95/// Parsed JAR blob.
96#[derive(Debug)]
97pub struct ParsedBlob<'a> {
98    /// Header fields.
99    pub header: ProgramHeader,
100    /// Capability manifest entries.
101    pub caps: Vec<CapManifestEntry>,
102    /// Data section (referenced by capabilities via data_offset + data_len).
103    pub data_section: &'a [u8],
104}
105
106fn read_u8(blob: &[u8], offset: &mut usize) -> Option<u8> {
107    if *offset >= blob.len() {
108        return None;
109    }
110    let v = blob[*offset];
111    *offset += 1;
112    Some(v)
113}
114
115fn read_u32_le(blob: &[u8], offset: &mut usize) -> Option<u32> {
116    if *offset + 4 > blob.len() {
117        return None;
118    }
119    let v = u32::from_le_bytes([
120        blob[*offset],
121        blob[*offset + 1],
122        blob[*offset + 2],
123        blob[*offset + 3],
124    ]);
125    *offset += 4;
126    Some(v)
127}
128
129/// Parse a JAR program blob.
130pub fn parse_blob(blob: &[u8]) -> Option<ParsedBlob<'_>> {
131    if blob.len() < HEADER_SIZE {
132        return None;
133    }
134
135    let mut offset = 0;
136
137    // Header
138    let magic = read_u32_le(blob, &mut offset)?;
139    if magic != JAR_MAGIC {
140        return None;
141    }
142    let memory_pages = read_u32_le(blob, &mut offset)?;
143    let cap_count = read_u8(blob, &mut offset)?;
144    let init_cap = read_u8(blob, &mut offset)?;
145
146    // Capability entries
147    let entries_size = cap_count as usize * CAP_ENTRY_SIZE;
148    if offset + entries_size > blob.len() {
149        return None;
150    }
151
152    let mut caps = Vec::with_capacity(cap_count as usize);
153    for _ in 0..cap_count {
154        let cap_index = read_u8(blob, &mut offset)?;
155        let cap_type_raw = read_u8(blob, &mut offset)?;
156        let cap_type = match cap_type_raw {
157            0 => CapEntryType::Code,
158            1 => CapEntryType::Data,
159            _ => return None,
160        };
161        let page_count = read_u32_le(blob, &mut offset)?;
162        let data_offset = read_u32_le(blob, &mut offset)?;
163        let data_len = read_u32_le(blob, &mut offset)?;
164
165        caps.push(CapManifestEntry {
166            cap_index,
167            cap_type,
168            page_count,
169            data_offset,
170            data_len,
171        });
172    }
173
174    // Data section = everything after the cap entries
175    let data_section = &blob[offset..];
176
177    // Validate data references
178    for cap in &caps {
179        if cap.data_len > 0 {
180            let end = cap.data_offset as usize + cap.data_len as usize;
181            if end > data_section.len() {
182                return None;
183            }
184        }
185    }
186
187    Some(ParsedBlob {
188        header: ProgramHeader {
189            memory_pages,
190            cap_count,
191            init_cap,
192        },
193        caps,
194        data_section,
195    })
196}
197
198/// Parsed code sub-blob (within a CODE cap's data section).
199#[derive(Debug)]
200pub struct ParsedCodeBlob {
201    pub jump_table: Vec<u32>,
202    pub code: Vec<u8>,
203    pub bitmask: Vec<u8>,
204}
205
206/// Parse a CODE cap's data section into jump table, code, and bitmask.
207/// Format: jump_len(4) + entry_size(1) + code_len(4) + jump_entries + code + packed_bitmask
208pub fn parse_code_blob(data: &[u8]) -> Option<ParsedCodeBlob> {
209    if data.len() < 9 {
210        return None;
211    }
212    let mut offset = 0;
213    let jump_len = read_u32_le(data, &mut offset)? as usize;
214    let entry_size = read_u8(data, &mut offset)? as usize;
215    let code_len = read_u32_le(data, &mut offset)? as usize;
216
217    if entry_size == 0 || entry_size > 4 {
218        return None;
219    }
220
221    // Read jump table
222    let jt_bytes = jump_len * entry_size;
223    if offset + jt_bytes > data.len() {
224        return None;
225    }
226    let mut jump_table = Vec::with_capacity(jump_len);
227    for _ in 0..jump_len {
228        let mut val: u32 = 0;
229        for i in 0..entry_size {
230            val |= (data[offset + i] as u32) << (i * 8);
231        }
232        jump_table.push(val);
233        offset += entry_size;
234    }
235
236    // Read code
237    if offset + code_len > data.len() {
238        return None;
239    }
240    let code = data[offset..offset + code_len].to_vec();
241    offset += code_len;
242
243    // Read packed bitmask
244    let bitmask_bytes = code_len.div_ceil(8);
245    if offset + bitmask_bytes > data.len() {
246        return None;
247    }
248    let bitmask = unpack_bitmask(&data[offset..offset + bitmask_bytes], code_len);
249
250    Some(ParsedCodeBlob {
251        jump_table,
252        code,
253        bitmask,
254    })
255}
256
257/// Unpack a packed bitmask (1 bit per byte) into one byte per code position.
258fn unpack_bitmask(packed: &[u8], code_len: usize) -> Vec<u8> {
259    let mut bitmask = vec![0u8; code_len];
260    for i in 0..code_len {
261        bitmask[i] = (packed[i / 8] >> (i % 8)) & 1;
262    }
263    bitmask
264}
265
266/// Build a minimal JAR blob with a single CODE cap from raw components.
267/// Useful for tests — no DATA caps, small memory budget.
268pub fn build_simple_blob(code: &[u8], bitmask: &[u8], jump_table: &[u32]) -> Vec<u8> {
269    // Build code sub-blob: jump_len(4) + entry_size(1) + code_len(4) + jt + code + packed_bitmask
270    let entry_size = if jump_table.is_empty() { 1u8 } else { 4u8 };
271    let mut code_data = Vec::new();
272    code_data.extend_from_slice(&(jump_table.len() as u32).to_le_bytes());
273    code_data.push(entry_size);
274    code_data.extend_from_slice(&(code.len() as u32).to_le_bytes());
275    for &jt_entry in jump_table {
276        code_data.extend_from_slice(&jt_entry.to_le_bytes()[..entry_size as usize]);
277    }
278    code_data.extend_from_slice(code);
279    // Pack bitmask
280    let packed_len = code.len().div_ceil(8);
281    let mut packed = vec![0u8; packed_len];
282    for (i, &b) in bitmask.iter().enumerate() {
283        if b != 0 {
284            packed[i / 8] |= 1 << (i % 8);
285        }
286    }
287    code_data.extend_from_slice(&packed);
288
289    let caps = vec![CapManifestEntry {
290        cap_index: 64,
291        cap_type: CapEntryType::Code,
292        page_count: 0,
293        data_offset: 0,
294        data_len: code_data.len() as u32,
295    }];
296    build_blob(4, 64, &caps, &code_data)
297}
298
299/// Build a JAR blob from components.
300pub fn build_blob(
301    memory_pages: u32,
302    init_cap: u8,
303    caps: &[CapManifestEntry],
304    data_section: &[u8],
305) -> Vec<u8> {
306    let cap_count = caps.len() as u8;
307    let total_size = HEADER_SIZE + caps.len() * CAP_ENTRY_SIZE + data_section.len();
308    let mut blob = vec![0u8; total_size];
309    let mut offset = 0;
310
311    // Header (10 bytes: magic + memory_pages + cap_count + init_cap)
312    write_u32_le(&mut blob, &mut offset, JAR_MAGIC);
313    write_u32_le(&mut blob, &mut offset, memory_pages);
314    write_u8(&mut blob, &mut offset, cap_count);
315    write_u8(&mut blob, &mut offset, init_cap);
316
317    // Cap entries
318    for cap in caps {
319        write_u8(&mut blob, &mut offset, cap.cap_index);
320        write_u8(&mut blob, &mut offset, cap.cap_type as u8);
321        write_u32_le(&mut blob, &mut offset, cap.page_count);
322        write_u32_le(&mut blob, &mut offset, cap.data_offset);
323        write_u32_le(&mut blob, &mut offset, cap.data_len);
324    }
325
326    // Data section
327    blob[offset..].copy_from_slice(data_section);
328
329    blob
330}
331
332fn write_u8(buf: &mut [u8], offset: &mut usize, v: u8) {
333    buf[*offset] = v;
334    *offset += 1;
335}
336
337fn write_u32_le(buf: &mut [u8], offset: &mut usize, v: u32) {
338    buf[*offset..*offset + 4].copy_from_slice(&v.to_le_bytes());
339    *offset += 4;
340}
341
342/// Get the data slice for a capability entry from the data section.
343pub fn cap_data<'a>(entry: &CapManifestEntry, data_section: &'a [u8]) -> &'a [u8] {
344    if entry.data_len == 0 {
345        return &[];
346    }
347    &data_section[entry.data_offset as usize..entry.data_offset as usize + entry.data_len as usize]
348}
349
350#[cfg(test)]
351mod tests {
352    use super::*;
353
354    fn make_test_blob() -> (Vec<u8>, Vec<u8>, Vec<u8>) {
355        // CODE blob: 4 bytes of PVM code
356        let code_data = vec![0x00, 0x01, 0x02, 0x03]; // trap, fallthrough, unlikely, ...
357        // RO data: 8 bytes
358        let ro_data = vec![0xDE, 0xAD, 0xBE, 0xEF, 0xCA, 0xFE, 0xBA, 0xBE];
359
360        // Combined data section: code_data + ro_data
361        let mut data_section = Vec::new();
362        data_section.extend_from_slice(&code_data);
363        data_section.extend_from_slice(&ro_data);
364
365        (code_data, ro_data, data_section)
366    }
367
368    #[test]
369    fn test_roundtrip() {
370        let (_code_data, _ro_data, data_section) = make_test_blob();
371
372        let caps = vec![
373            CapManifestEntry {
374                cap_index: 64,
375                cap_type: CapEntryType::Code,
376                page_count: 0,
377                data_offset: 0,
378                data_len: 4, // code blob
379            },
380            CapManifestEntry {
381                cap_index: 65,
382                cap_type: CapEntryType::Data,
383                page_count: 1,
384                data_offset: 0,
385                data_len: 0, // zero-filled stack
386            },
387            CapManifestEntry {
388                cap_index: 66,
389                cap_type: CapEntryType::Data,
390                page_count: 1,
391                data_offset: 4,
392                data_len: 8, // ro_data
393            },
394        ];
395
396        let blob = build_blob(10, 64, &caps, &data_section);
397        let parsed = parse_blob(&blob).expect("parse failed");
398
399        assert_eq!(parsed.header.memory_pages, 10);
400        assert_eq!(parsed.header.cap_count, 3);
401        assert_eq!(parsed.header.init_cap, 64);
402        assert_eq!(parsed.caps.len(), 3);
403
404        // CODE cap
405        assert_eq!(parsed.caps[0].cap_index, 64);
406        assert_eq!(parsed.caps[0].cap_type, CapEntryType::Code);
407        assert_eq!(parsed.caps[0].data_len, 4);
408        let code = cap_data(&parsed.caps[0], parsed.data_section);
409        assert_eq!(code, &[0x00, 0x01, 0x02, 0x03]);
410
411        // Stack DATA cap (zero-filled)
412        assert_eq!(parsed.caps[1].cap_index, 65);
413        assert_eq!(parsed.caps[1].cap_type, CapEntryType::Data);
414        assert_eq!(parsed.caps[1].page_count, 1);
415        assert_eq!(parsed.caps[1].data_len, 0);
416
417        // RO DATA cap
418        assert_eq!(parsed.caps[2].cap_index, 66);
419        assert_eq!(parsed.caps[2].cap_type, CapEntryType::Data);
420        assert_eq!(parsed.caps[2].page_count, 1);
421        let ro = cap_data(&parsed.caps[2], parsed.data_section);
422        assert_eq!(ro, &[0xDE, 0xAD, 0xBE, 0xEF, 0xCA, 0xFE, 0xBA, 0xBE]);
423    }
424
425    #[test]
426    fn test_bad_magic() {
427        let blob = build_blob(10, 64, &[], &[]);
428        let mut bad = blob.clone();
429        bad[3] = 0x99; // corrupt version byte
430        assert!(parse_blob(&bad).is_none());
431    }
432
433    #[test]
434    fn test_truncated_blob() {
435        // Too short for header
436        assert!(parse_blob(&[0; 5]).is_none());
437
438        // Header says 1 cap but blob is too short
439        let blob = build_blob(10, 64, &[], &[]);
440        let mut bad = blob;
441        bad[8] = 1; // cap_count = 1 but no cap entries follow
442        assert!(parse_blob(&bad).is_none());
443    }
444
445    #[test]
446    fn test_bad_data_reference() {
447        let caps = vec![CapManifestEntry {
448            cap_index: 64,
449            cap_type: CapEntryType::Code,
450            page_count: 0,
451            data_offset: 0,
452            data_len: 100, // references 100 bytes but data section is empty
453        }];
454        let blob = build_blob(10, 64, &caps, &[]);
455        assert!(parse_blob(&blob).is_none());
456    }
457
458    #[test]
459    fn test_empty_manifest() {
460        let blob = build_blob(0, 0, &[], &[]);
461        let parsed = parse_blob(&blob).unwrap();
462        assert_eq!(parsed.caps.len(), 0);
463        assert_eq!(parsed.data_section.len(), 0);
464    }
465
466    #[test]
467    fn test_code_sub_blob_with_jump_table() {
468        // Build a code sub-blob: jump_len=2, entry_size=4, code=[0,1], bitmask=[1,1], jt=[0,1]
469        let mut code_data = Vec::new();
470        code_data.extend_from_slice(&2u32.to_le_bytes()); // jump_len
471        code_data.push(4); // entry_size
472        code_data.extend_from_slice(&2u32.to_le_bytes()); // code_len
473        // jump table: 2 entries × 4 bytes
474        code_data.extend_from_slice(&0u32.to_le_bytes());
475        code_data.extend_from_slice(&1u32.to_le_bytes());
476        // code bytes
477        code_data.push(0); // trap
478        code_data.push(1); // fallthrough
479        // packed bitmask: 1 byte for 2 bits = 0b11 = 3
480        code_data.push(0x03);
481
482        let blob = parse_code_blob(&code_data);
483        assert!(blob.is_some(), "code sub-blob should parse");
484        let blob = blob.unwrap();
485        assert_eq!(blob.code, vec![0, 1]);
486        assert_eq!(blob.bitmask, vec![1, 1]);
487        assert_eq!(blob.jump_table, vec![0, 1]);
488    }
489
490    #[test]
491    fn test_build_simple_blob_roundtrip() {
492        let blob = build_simple_blob(&[0, 1, 0], &[1, 1, 1], &[]);
493        let parsed = parse_blob(&blob).expect("should parse");
494        assert_eq!(parsed.caps.len(), 1); // 1 CODE cap
495        let code_cap = &parsed.caps[0];
496        assert_eq!(code_cap.cap_type, CapEntryType::Code);
497        let code_blob = parse_code_blob(cap_data(code_cap, parsed.data_section)).unwrap();
498        assert_eq!(code_blob.code, vec![0, 1, 0]);
499        assert_eq!(code_blob.bitmask, vec![1, 1, 1]);
500    }
501}