Skip to main content

nub_host_common/
layout.rs

1/*
2Copyright 2025  The Hyperlight Authors.
3
4Licensed under the Apache License, Version 2.0 (the "License");
5you may not use this file except in compliance with the License.
6You may obtain a copy of the License at
7
8    http://www.apache.org/licenses/LICENSE-2.0
9
10Unless required by applicable law or agreed to in writing, software
11distributed under the License is distributed on an "AS IS" BASIS,
12WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13See the License for the specific language governing permissions and
14limitations under the License.
15 */
16
17#[cfg_attr(target_arch = "x86", path = "arch/i686/layout.rs")]
18#[cfg_attr(
19    all(target_arch = "x86_64", not(feature = "i686-guest")),
20    path = "arch/amd64/layout.rs"
21)]
22#[cfg_attr(
23    all(target_arch = "x86_64", feature = "i686-guest"),
24    path = "arch/i686/layout.rs"
25)]
26#[cfg_attr(target_arch = "aarch64", path = "arch/aarch64/layout.rs")]
27mod arch;
28
29pub use arch::{MAX_GPA, MAX_GVA};
30#[cfg(any(
31    all(target_arch = "x86_64", not(feature = "i686-guest")),
32    target_arch = "aarch64"
33))]
34pub use arch::{SNAPSHOT_PT_GVA_MAX, SNAPSHOT_PT_GVA_MIN};
35
36/// Base VA at which the guest's entire memory range is mapped.
37/// Both the host (via mmap of snapshot/scratch regions) and the
38/// guest (via its page table) use this as the anchor. Configurable
39/// via JAR_GUEST_VA_BASE env var (hex string, with or without 0x
40/// prefix); default chosen to sit in the practically-never-touched
41/// mid-range band of x86_64 user VA space.
42pub const GUEST_VA_BASE_DEFAULT: u64 = 0x5000_0000_0000;
43/// Total VA range reserved for the guest. Layout inside:
44/// [0, 4 GiB) javm program; [4, 5 GiB) JIT scratch;
45/// [5 GiB, 7 GiB) kernel (KERNEL_OFFSET); [7 GiB, end) scratch.
46pub const GUEST_VA_SIZE: u64 = 0x4_4000_0000;
47/// Offset within the reservation where the kernel binary loads.
48pub const KERNEL_OFFSET: u64 = 0x1_4000_0000; // 5 GiB
49
50/// Stores the base VA chosen at reservation time on platforms where
51/// `guest_va_base()` is determined dynamically (today: macOS, which
52/// lacks `MAP_FIXED_NOREPLACE`). On Linux it stays `None` and
53/// [`guest_va_base()`] resolves to the env override / default.
54#[cfg(all(feature = "std", target_os = "macos"))]
55static MACOS_RESERVED_BASE: std::sync::OnceLock<u64> = std::sync::OnceLock::new();
56
57#[cfg(feature = "std")]
58pub fn guest_va_base() -> u64 {
59    #[cfg(target_os = "macos")]
60    if let Some(&base) = MACOS_RESERVED_BASE.get() {
61        return base;
62    }
63    if let Ok(s) = std::env::var("JAR_GUEST_VA_BASE") {
64        let s = s.trim().trim_start_matches("0x");
65        u64::from_str_radix(s, 16).expect("JAR_GUEST_VA_BASE must be hex")
66    } else {
67        GUEST_VA_BASE_DEFAULT
68    }
69}
70
71/// One-time process-wide reservation of the [`guest_va_base()`,
72/// `guest_va_base() + GUEST_VA_SIZE`) range. Done on host startup so
73/// later mmaps of guest-visible regions (snapshot, scratch, kernel
74/// shadow) can land at known fixed VAs via `MAP_FIXED` inside this
75/// reservation.
76///
77/// On Linux we use `MAP_FIXED_NOREPLACE` to claim the configured base
78/// atomically; failure means something is squatting on the range,
79/// which is almost certainly a misconfiguration — error loudly.
80///
81/// On macOS `MAP_FIXED_NOREPLACE` doesn't exist, so we let the
82/// kernel pick a base via plain `mmap`. macOS ASLR almost never
83/// places mid-range addresses, but if it does we munmap and retry
84/// up to ~10 times; the successful base is then stored so
85/// [`guest_va_base()`] returns it.
86#[cfg(feature = "std")]
87pub fn reserve_guest_va_range() -> Result<(), std::io::Error> {
88    use std::sync::OnceLock;
89    static RESERVED: OnceLock<Result<(), String>> = OnceLock::new();
90    let res = RESERVED.get_or_init(reserve_guest_va_range_inner);
91    res.clone().map_err(std::io::Error::other)
92}
93
94#[cfg(all(feature = "std", target_os = "linux"))]
95fn reserve_guest_va_range_inner() -> Result<(), String> {
96    let base = guest_va_base();
97    let size = GUEST_VA_SIZE as usize;
98    // SAFETY: mmap is a kernel call; we check the result before use.
99    let ptr = unsafe {
100        libc::mmap(
101            base as *mut libc::c_void,
102            size,
103            libc::PROT_NONE,
104            libc::MAP_PRIVATE
105                | libc::MAP_ANONYMOUS
106                | libc::MAP_FIXED_NOREPLACE
107                | libc::MAP_NORESERVE,
108            -1,
109            0,
110        )
111    };
112    if ptr == libc::MAP_FAILED {
113        return Err(format!(
114            "JAR guest VA reservation failed: mmap({:#x}, {} bytes, MAP_FIXED_NOREPLACE): {}",
115            base,
116            size,
117            std::io::Error::last_os_error()
118        ));
119    }
120    if ptr as u64 != base {
121        // Older glibc fallback path: NOREPLACE was ignored and the
122        // kernel placed the mapping elsewhere. Unmap and bail —
123        // something is squatting on our VA range.
124        // SAFETY: ptr came from a successful mmap.
125        unsafe {
126            libc::munmap(ptr, size);
127        }
128        return Err(format!(
129            "JAR guest VA reservation: requested {:#x}, kernel returned {:#x} — \
130             something is squatting on our range",
131            base, ptr as u64
132        ));
133    }
134    Ok(())
135}
136
137#[cfg(all(feature = "std", target_os = "macos"))]
138fn reserve_guest_va_range_inner() -> Result<(), String> {
139    // 5 GiB — comfortably above the low region where the loader,
140    // heap, and per-process stacks tend to cluster. If macOS hands
141    // us anything below this we retry.
142    const MIN_BASE: u64 = 0x1_4000_0000;
143    let size = GUEST_VA_SIZE as usize;
144    for _ in 0..10 {
145        // SAFETY: plain mmap with a null hint; result checked below.
146        let ptr = unsafe {
147            libc::mmap(
148                core::ptr::null_mut(),
149                size,
150                libc::PROT_NONE,
151                libc::MAP_PRIVATE | libc::MAP_ANON,
152                -1,
153                0,
154            )
155        };
156        if ptr == libc::MAP_FAILED {
157            return Err(format!(
158                "JAR guest VA reservation failed: mmap({} bytes): {}",
159                size,
160                std::io::Error::last_os_error()
161            ));
162        }
163        if (ptr as u64) >= MIN_BASE {
164            MACOS_RESERVED_BASE
165                .set(ptr as u64)
166                .expect("MACOS_RESERVED_BASE set once");
167            return Ok(());
168        }
169        // SAFETY: ptr came from a successful mmap.
170        unsafe {
171            libc::munmap(ptr, size);
172        }
173    }
174    Err("macOS: could not reserve guest VA range outside low 5 GiB after 10 retries".into())
175}
176
177#[cfg(all(feature = "std", not(any(target_os = "linux", target_os = "macos"))))]
178fn reserve_guest_va_range_inner() -> Result<(), String> {
179    Err("JAR guest VA reservation: unsupported host OS (only linux and macos are supported)".into())
180}
181
182// offsets down from the top of scratch memory for various things
183pub const SCRATCH_TOP_SIZE_OFFSET: u64 = 0x08;
184pub const SCRATCH_TOP_ALLOCATOR_OFFSET: u64 = 0x10;
185pub const SCRATCH_TOP_SNAPSHOT_PT_GPA_BASE_OFFSET: u64 = 0x18;
186pub const SCRATCH_TOP_SNAPSHOT_GENERATION_OFFSET: u64 = 0x20;
187pub const SCRATCH_TOP_EXN_STACK_OFFSET: u64 = 0x30;
188
189/// Offset from the top of scratch memory for a shared host-guest u64 counter.
190///
191/// This is placed at 0x1008 (rather than the next sequential 0x28) so that the
192/// counter falls in scratch page 0xffffe000 instead of the very last page
193/// 0xfffff000, which on i686 guests would require frame 0xfffff — exceeding the
194/// maximum representable frame number.
195#[cfg(feature = "guest-counter")]
196pub const SCRATCH_TOP_GUEST_COUNTER_OFFSET: u64 = 0x1008;
197
198pub fn scratch_base_gpa(size: usize) -> u64 {
199    (MAX_GPA - size + 1) as u64
200}
201pub fn scratch_base_gva(size: usize) -> u64 {
202    (MAX_GVA - size + 1) as u64
203}
204
205/// Compute the minimum scratch region size needed for a sandbox.
206pub use arch::min_scratch_size;