nub_host_kvm/sandbox/initialized_multi_use.rs
1/*
2Copyright 2025 The Hyperlight Authors.
3
4Licensed under the Apache License, Version 2.0 (the "License");
5you may not use this file except in compliance with the License.
6You may obtain a copy of the License at
7
8 http://www.apache.org/licenses/LICENSE-2.0
9
10Unless required by applicable law or agreed to in writing, software
11distributed under the License is distributed on an "AS IS" BASIS,
12WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13See the License for the specific language governing permissions and
14limitations under the License.
15*/
16
17use std::sync::atomic::Ordering;
18use std::sync::{Arc, Mutex};
19
20use javm_cap::cap::Cap;
21use javm_cap::wire::WireCap;
22use nub_arch_x86_abi::{
23 BootInfo, CapHash as AbiCapHash, FN_ID_NUB_GET_BOOT_INFO, FN_ID_NUB_PUT_CAP,
24};
25use nub_host_common::rpc::{ArchivedResponse, Request};
26use rkyv::util::AlignedVec;
27use tracing::{Span, instrument};
28
29use super::host_funcs::FunctionRegistry;
30use crate::HyperlightError;
31use crate::Result;
32use crate::guest_cache_reader::GuestCacheReader;
33use crate::hypervisor::InterruptHandle;
34use crate::hypervisor::hyperlight_vm::HyperlightVm;
35use crate::mem::mgr::SandboxMemoryManager;
36use crate::mem::shared_mem::HostSharedMemory;
37use crate::metrics::{
38 METRIC_GUEST_ERROR, METRIC_GUEST_ERROR_LABEL_CODE, maybe_time_and_emit_guest_call,
39};
40
41/// A fully initialized sandbox that can execute guest functions multiple times.
42///
43/// Guest functions can be called repeatedly while maintaining state between calls.
44///
45/// Post-Stage-F: the upstream `snapshot()` / `restore()` / `map_file_cow()`
46/// rollback machinery is gone along with the CoW PT marking that backed it.
47/// If a guest call fails for any reason, drop the sandbox and build a new
48/// one — that's the only recovery path now (and the one `nub` already used).
49pub struct MultiUseSandbox {
50 /// Unique identifier for this sandbox instance
51 id: u64,
52 pub(crate) host_funcs: Arc<Mutex<FunctionRegistry>>,
53 pub(crate) mem_mgr: SandboxMemoryManager<HostSharedMemory>,
54 vm: HyperlightVm,
55 /// Lazily-initialised host-side view of the guest's heap-resident
56 /// `CacheDirectory`. Built on the first `put_cap_with_hash` call
57 /// (triggers `nub_get_boot_info` once to read the directory VA),
58 /// then reused. Lets the host short-circuit idempotent re-puts
59 /// without a roundtrip + merkle walk through the guest.
60 guest_cache_reader: Option<GuestCacheReader>,
61 #[cfg(gdb)]
62 dbg_mem_access_fn: Arc<Mutex<SandboxMemoryManager<HostSharedMemory>>>,
63}
64
65impl MultiUseSandbox {
66 /// Move an `UninitializedSandbox` into a new `MultiUseSandbox` instance.
67 ///
68 /// This function is not equivalent to doing an `evolve` from uninitialized
69 /// to initialized, and is purposely not exposed publicly outside the crate
70 /// (as a `From` implementation would be)
71 #[instrument(skip_all, parent = Span::current(), level = "Trace")]
72 pub(super) fn from_uninit(
73 host_funcs: Arc<Mutex<FunctionRegistry>>,
74 mgr: SandboxMemoryManager<HostSharedMemory>,
75 vm: HyperlightVm,
76 #[cfg(gdb)] dbg_mem_access_fn: Arc<Mutex<SandboxMemoryManager<HostSharedMemory>>>,
77 ) -> MultiUseSandbox {
78 Self {
79 id: super::snapshot::SANDBOX_CONFIGURATION_COUNTER.fetch_add(1, Ordering::Relaxed),
80 host_funcs,
81 mem_mgr: mgr,
82 vm,
83 guest_cache_reader: None,
84 #[cfg(gdb)]
85 dbg_mem_access_fn,
86 }
87 }
88
89 /// Returns this sandbox's unique id.
90 pub fn id(&self) -> u64 {
91 self.id
92 }
93
94 /// Call a guest function by `fn_id` with a raw byte payload.
95 /// Returns the response payload bytes on success.
96 ///
97 /// Wire format: the host serialises a
98 /// [`nub_host_common::rpc::Request`] (rkyv) carrying `fn_id` and
99 /// `payload`, ships it via the input data ring, the guest decodes
100 /// + dispatches + writes a `Response` to the output ring, and we
101 /// read + check `status` before returning the inner payload.
102 ///
103 /// Changes made to the sandbox during execution are persisted.
104 /// On failure the sandbox should be dropped and rebuilt.
105 #[instrument(err(Debug), skip(self, payload), parent = Span::current())]
106 pub fn call_raw(&mut self, fn_id: u32, payload: &[u8]) -> Result<Vec<u8>> {
107 maybe_time_and_emit_guest_call("call_raw", || {
108 self.call_guest_function_by_id(fn_id, payload)
109 })
110 }
111
112 fn call_guest_function_by_id(&mut self, fn_id: u32, payload: &[u8]) -> Result<Vec<u8>> {
113 // ===== KILL() TIMING POINT 1 =====
114 // Clear any stale cancellation from a previous guest function call or if kill() was called too early.
115 // Any kill() that completed (even partially) BEFORE this line has NO effect on this call.
116 self.vm.clear_cancel();
117
118 let res = (|| {
119 let req = Request {
120 fn_id,
121 payload: payload.to_vec(),
122 };
123 let req_bytes = rkyv::to_bytes::<rkyv::rancor::Error>(&req)
124 .map_err(|e| crate::new_error!("rkyv-serialize Request: {e}"))?;
125
126 self.mem_mgr
127 .write_guest_function_call_raw(req_bytes.as_slice())?;
128
129 let dispatch_res = self.vm.dispatch_call_from_host(
130 &mut self.mem_mgr,
131 &self.host_funcs,
132 #[cfg(gdb)]
133 self.dbg_mem_access_fn.clone(),
134 );
135
136 if let Err(e) = dispatch_res {
137 let (error, _should_poison) = e.promote();
138 return Err(error);
139 }
140
141 let raw_resp = self.mem_mgr.read_guest_function_call_result_raw()?;
142
143 let mut aligned = AlignedVec::<16>::with_capacity(raw_resp.len());
144 aligned.extend_from_slice(&raw_resp);
145
146 let resp = rkyv::access::<ArchivedResponse, rkyv::rancor::Error>(aligned.as_slice())
147 .map_err(|e| crate::new_error!("rkyv-access Response: {e}"))?;
148
149 let status = resp.status.to_native();
150 if status != 0 {
151 let msg = resp
152 .error_msg
153 .as_ref()
154 .map(|s| s.as_str().to_string())
155 .unwrap_or_else(|| format!("guest fn_id={fn_id} returned status {status}"));
156 metrics::counter!(
157 METRIC_GUEST_ERROR,
158 METRIC_GUEST_ERROR_LABEL_CODE => status.to_string()
159 )
160 .increment(1);
161 return Err(HyperlightError::GuestError(
162 hyperlight_common::flatbuffer_wrappers::guest_error::ErrorCode::GuestError,
163 msg,
164 ));
165 }
166
167 Ok(resp.payload.as_slice().to_vec())
168 })();
169
170 // Clear partial abort bytes so they don't leak across calls.
171 self.mem_mgr.abort_buffer.clear();
172
173 if res.is_err() {
174 self.mem_mgr.clear_io_buffers();
175 }
176
177 res
178 }
179
180 /// Publish a [`Cap`] into the guest's heap-resident cap
181 /// directory via the [`FN_ID_NUB_PUT_CAP`] RPC.
182 ///
183 /// Encodes `cap` as a [`WireCap`] (see `javm-cap`'s `wire`
184 /// module), ships it via [`Self::call_raw`], and reads back the
185 /// guest-computed `CapHash`. On the guest side, the cap is
186 /// inserted into the
187 /// `nub_arch_x86::state_cache::DIRECTORY` map, keyed by hash.
188 ///
189 /// Caps that can't be represented on the wire (e.g.
190 /// `DataContent::Paged`, `CNode` with `Ref`-typed slots, etc.)
191 /// fail at the wire conversion step with a typed error.
192 /// Encode/decode failures are surfaced as
193 /// `HyperlightError::Error`. A sentinel response (all-`0xFF`
194 /// hash) from the guest is also turned into an error.
195 pub fn put_cap(&mut self, cap: &Cap) -> Result<AbiCapHash> {
196 let wire = WireCap::from_cap(cap)
197 .map_err(|e| crate::new_error!("put_cap: wire conversion failed: {e}"))?;
198 let cap_bytes = rkyv::to_bytes::<rkyv::rancor::Error>(&wire)
199 .map_err(|e| crate::new_error!("put_cap: rkyv encode WireCap: {e}"))?;
200 let resp = self.call_raw(FN_ID_NUB_PUT_CAP, cap_bytes.as_slice())?;
201 if resp.len() != 32 {
202 return Err(crate::new_error!(
203 "put_cap: expected 32-byte hash response, got {}",
204 resp.len()
205 ));
206 }
207 let mut hash: AbiCapHash = [0u8; 32];
208 hash.copy_from_slice(&resp);
209 // Guest's `nub_put_cap` returns `0xFF * 32` on decode/conv
210 // failure. Surface as a typed error so callers don't observe
211 // a fake hash.
212 if hash == [0xFFu8; 32] {
213 return Err(crate::new_error!(
214 "put_cap: guest reported decode/conversion failure (sentinel response)"
215 ));
216 }
217 Ok(hash)
218 }
219
220 /// Pre-hashed put: idempotent fast path that short-circuits the
221 /// full [`Self::put_cap`] RPC when the guest's directory already
222 /// holds `hash`.
223 ///
224 /// Behaviour:
225 ///
226 /// - If `GuestCacheReader::contains(hash)` returns `true`,
227 /// return immediately — the guest already has the cap and we
228 /// skip rkyv encode + VMEXIT + guest decode + merkle walk +
229 /// directory insert. This is the hot path for bench loops that
230 /// re-publish the same cap graph every iteration.
231 /// - Otherwise, ship `put_cap(cap)`, then debug-assert the
232 /// returned hash matches `hash`.
233 ///
234 /// The reader is built lazily on first call (one `nub_get_boot_info`
235 /// RPC to read `BootInfo.directory_va`, then a single struct
236 /// construction); subsequent calls hit the cached reader.
237 pub fn put_cap_with_hash(&mut self, hash: AbiCapHash, cap: &Cap) -> Result<()> {
238 let exists = self.ensure_guest_cache_reader()?.contains(&hash);
239 if exists {
240 return Ok(());
241 }
242 let got = self.put_cap(cap)?;
243 debug_assert_eq!(
244 got, hash,
245 "put_cap_with_hash: guest-computed hash differs from claimed hash"
246 );
247 Ok(())
248 }
249
250 /// Lazily build the `GuestCacheReader`. Issues one
251 /// `nub_get_boot_info` RPC to read `BootInfo.directory_va`, then
252 /// constructs the reader; subsequent calls return the cached
253 /// reader without a roundtrip.
254 fn ensure_guest_cache_reader(&mut self) -> Result<&GuestCacheReader> {
255 if self.guest_cache_reader.is_none() {
256 let raw = self.call_raw(FN_ID_NUB_GET_BOOT_INFO, &[])?;
257 let expected = core::mem::size_of::<BootInfo>();
258 if raw.len() != expected {
259 return Err(crate::new_error!(
260 "nub_get_boot_info: expected {} bytes, got {}",
261 expected,
262 raw.len()
263 ));
264 }
265 // SAFETY: `BootInfo` is `#[repr(C)]` POD; the guest packs
266 // exactly `size_of::<BootInfo>()` bytes via
267 // `core::ptr::read` over its `static mut BOOT_INFO`. The
268 // host's matching layout comes from the same
269 // `nub-arch-x86-abi` crate.
270 let info: BootInfo =
271 unsafe { core::ptr::read_unaligned(raw.as_ptr() as *const BootInfo) };
272 // SAFETY: `info.directory_va` was published by the guest
273 // after `init_directory_va`; the host has the guest's
274 // kernel image mmap'd at the same VA via the
275 // `install_snapshot_mapping` fixed-VA shadow, so the
276 // pointer is valid in the host's address space.
277 let reader = unsafe { GuestCacheReader::new(&info) }
278 .map_err(|e| crate::new_error!("guest_cache_reader: {e}"))?;
279 self.guest_cache_reader = Some(reader);
280 }
281 Ok(self.guest_cache_reader.as_ref().expect("set above"))
282 }
283
284 /// Returns a handle for interrupting guest execution.
285 pub fn interrupt_handle(&self) -> Arc<dyn InterruptHandle> {
286 self.vm.interrupt_handle()
287 }
288
289 /// Generate a crash dump of the current state of the VM underlying this sandbox.
290 #[cfg(crashdump)]
291 #[instrument(err(Debug), skip_all, parent = Span::current())]
292 pub fn generate_crashdump(&mut self) -> Result<()> {
293 crate::hypervisor::crashdump::generate_crashdump(&self.vm, &mut self.mem_mgr, None)
294 }
295
296 /// Generate a crash dump of the current state of the VM, writing to `dir`.
297 #[cfg(crashdump)]
298 #[instrument(err(Debug), skip_all, parent = Span::current())]
299 pub fn generate_crashdump_to_dir(&mut self, dir: impl Into<String>) -> Result<()> {
300 crate::hypervisor::crashdump::generate_crashdump(
301 &self.vm,
302 &mut self.mem_mgr,
303 Some(dir.into()),
304 )
305 }
306}
307
308impl std::fmt::Debug for MultiUseSandbox {
309 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
310 f.debug_struct("MultiUseSandbox").finish()
311 }
312}