nub_host_kvm/mem/shared_mem.rs
1/*
2Copyright 2025 The Hyperlight Authors.
3
4Licensed under the Apache License, Version 2.0 (the "License");
5you may not use this file except in compliance with the License.
6You may obtain a copy of the License at
7
8 http://www.apache.org/licenses/LICENSE-2.0
9
10Unless required by applicable law or agreed to in writing, software
11distributed under the License is distributed on an "AS IS" BASIS,
12WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13See the License for the specific language governing permissions and
14limitations under the License.
15*/
16
17use std::any::type_name;
18use std::ffi::c_void;
19use std::io::Error;
20use std::mem::{align_of, size_of};
21#[cfg(target_os = "linux")]
22use std::ptr::null_mut;
23use std::sync::{Arc, RwLock};
24
25use nub_host_common::mem::PAGE_SIZE_USIZE;
26use tracing::{Span, instrument};
27#[cfg(target_os = "windows")]
28use windows::Win32::Foundation::{CloseHandle, HANDLE, INVALID_HANDLE_VALUE};
29#[cfg(target_os = "windows")]
30use windows::Win32::System::Memory::PAGE_READWRITE;
31#[cfg(target_os = "windows")]
32use windows::Win32::System::Memory::{
33 CreateFileMappingA, FILE_MAP_ALL_ACCESS, MEMORY_MAPPED_VIEW_ADDRESS, MapViewOfFile,
34 PAGE_NOACCESS, PAGE_PROTECTION_FLAGS, UnmapViewOfFile, VirtualProtect,
35};
36#[cfg(target_os = "windows")]
37use windows::core::PCSTR;
38
39use super::memory_region::{
40 HostGuestMemoryRegion, MemoryRegion, MemoryRegionFlags, MemoryRegionKind, MemoryRegionType,
41};
42#[cfg(target_os = "windows")]
43use crate::HyperlightError::WindowsAPIError;
44use crate::{HyperlightError, Result, log_then_return, new_error};
45
46/// Makes sure that the given `offset` and `size` are within the bounds of the memory with size `mem_size`.
47macro_rules! bounds_check {
48 ($offset:expr, $size:expr, $mem_size:expr) => {
49 if $offset.checked_add($size).is_none_or(|end| end > $mem_size) {
50 return Err(new_error!(
51 "Cannot read value from offset {} with size {} in memory of size {}",
52 $offset,
53 $size,
54 $mem_size
55 ));
56 }
57 };
58}
59
60/// generates a reader function for the given type
61macro_rules! generate_reader {
62 ($fname:ident, $ty:ty) => {
63 /// Read a value of type `$ty` from the memory at the given offset.
64 #[allow(dead_code)]
65 #[instrument(err(Debug), skip_all, parent = Span::current(), level= "Trace")]
66 pub(crate) fn $fname(&self, offset: usize) -> Result<$ty> {
67 let data = self.as_slice();
68 bounds_check!(offset, std::mem::size_of::<$ty>(), data.len());
69 Ok(<$ty>::from_le_bytes(
70 data[offset..offset + std::mem::size_of::<$ty>()].try_into()?,
71 ))
72 }
73 };
74}
75
76/// generates a writer function for the given type
77macro_rules! generate_writer {
78 ($fname:ident, $ty:ty) => {
79 /// Write a value of type `$ty` to the memory at the given offset.
80 #[allow(dead_code)]
81 pub(crate) fn $fname(&mut self, offset: usize, value: $ty) -> Result<()> {
82 let data = self.as_mut_slice();
83 bounds_check!(offset, std::mem::size_of::<$ty>(), data.len());
84 data[offset..offset + std::mem::size_of::<$ty>()].copy_from_slice(&value.to_le_bytes());
85 Ok(())
86 }
87 };
88}
89
90/// A representation of a host mapping of a shared memory region,
91/// which will be released when this structure is Drop'd. This is not
92/// individually Clone (since it holds ownership of the mapping), or
93/// Send or Sync, since it doesn't ensure any particular synchronization.
94#[derive(Debug)]
95pub struct HostMapping {
96 ptr: *mut u8,
97 size: usize,
98 #[cfg(target_os = "windows")]
99 handle: HANDLE,
100}
101
102impl Drop for HostMapping {
103 #[cfg(target_os = "linux")]
104 fn drop(&mut self) {
105 use libc::munmap;
106
107 unsafe {
108 munmap(self.ptr as *mut c_void, self.size);
109 }
110 }
111 #[cfg(target_os = "windows")]
112 fn drop(&mut self) {
113 let mem_mapped_address = MEMORY_MAPPED_VIEW_ADDRESS {
114 Value: self.ptr as *mut c_void,
115 };
116 if let Err(e) = unsafe { UnmapViewOfFile(mem_mapped_address) } {
117 tracing::error!(
118 "Failed to drop HostMapping (UnmapViewOfFile failed): {:?}",
119 e
120 );
121 }
122
123 let file_handle: HANDLE = self.handle;
124 if let Err(e) = unsafe { CloseHandle(file_handle) } {
125 tracing::error!("Failed to drop HostMapping (CloseHandle failed): {:?}", e);
126 }
127 }
128}
129
130/// These three structures represent various phases of the lifecycle of
131/// a memory buffer that is shared with the guest. An
132/// ExclusiveSharedMemory is used for certain operations that
133/// unrestrictedly write to the shared memory, including setting it up
134/// and taking snapshots.
135#[derive(Debug)]
136pub struct ExclusiveSharedMemory {
137 region: Arc<HostMapping>,
138}
139unsafe impl Send for ExclusiveSharedMemory {}
140
141/// A GuestSharedMemory is used to represent
142/// the reference to all-of-memory that is taken by the virtual cpu.
143/// Because of the memory model limitations that affect
144/// HostSharedMemory, it is likely fairly important (to ensure that
145/// our UB remains limited to interaction with an external compilation
146/// unit that likely can't be discovered by the compiler) that _rust_
147/// users do not perform racy accesses to the guest communication
148/// buffers that are also accessed by HostSharedMemory.
149#[derive(Debug)]
150pub struct GuestSharedMemory {
151 region: Arc<HostMapping>,
152 /// The lock that indicates this shared memory is being used by non-Rust code
153 ///
154 /// This lock _must_ be held whenever the guest is executing,
155 /// because it prevents the host from converting its
156 /// HostSharedMemory to an ExclusiveSharedMemory. Since the guest
157 /// may arbitrarily mutate the shared memory, only synchronized
158 /// accesses from Rust should be allowed!
159 ///
160 /// We cannot enforce this in the type system, because the memory
161 /// is mapped in to the VM at VM creation time.
162 pub lock: Arc<RwLock<()>>,
163}
164unsafe impl Send for GuestSharedMemory {}
165
166/// A HostSharedMemory allows synchronized accesses to guest
167/// communication buffers, allowing it to be used concurrently with a
168/// GuestSharedMemory.
169///
170/// # Concurrency model
171///
172/// Given future requirements for asynchronous I/O with a minimum
173/// amount of copying (e.g. WASIp3 streams), we would like it to be
174/// possible to safely access these buffers concurrently with the
175/// guest, ensuring that (1) data is read appropriately if the guest
176/// is well-behaved; and (2) the host's behaviour is defined
177/// regardless of whether or not the guest is well-behaved.
178///
179/// The ideal (future) flow for a guest->host message is something like
180/// - Guest writes (unordered) bytes describing a work item into a buffer
181/// - Guest reveals buffer via a release-store of a pointer into an
182/// MMIO ring-buffer
183/// - Host acquire-loads the buffer pointer from the "MMIO" ring
184/// buffer
185/// - Host (unordered) reads the bytes from the buffer
186/// - Host performs validation of those bytes and uses them
187///
188/// Unfortunately, there appears to be no way to do this with defined
189/// behaviour in present Rust (see
190/// e.g. <https://github.com/rust-lang/unsafe-code-guidelines/issues/152>).
191/// Rust does not yet have its own defined memory model, but in the
192/// interim, it is widely treated as inheriting the current C/C++
193/// memory models. The most immediate problem is that regardless of
194/// anything else, under those memory models \[1, p. 17-18; 2, p. 88\],
195///
196/// > The execution of a program contains a _data race_ if it
197/// > contains two [C++23: "potentially concurrent"] conflicting
198/// > actions [C23: "in different threads"], at least one of which
199/// > is not atomic, and neither happens before the other [C++23: ",
200/// > except for the special case for signal handlers described
201/// > below"]. Any such data race results in undefined behavior.
202///
203/// Consequently, if a misbehaving guest fails to correctly
204/// synchronize its stores with the host, the host's innocent loads
205/// will trigger undefined behaviour for the entire program, including
206/// the host. Note that this also applies if the guest makes an
207/// unsynchronized read of a location that the host is writing!
208///
209/// Despite Rust's de jure inheritance of the C memory model at the
210/// present time, the compiler in many cases de facto adheres to LLVM
211/// semantics, so it is worthwhile to consider what LLVM does in this
212/// case as well. According to the the LangRef \[3\] memory model,
213/// loads which are involved in a race that includes at least one
214/// non-atomic access (whether the load or a store) return `undef`,
215/// making them roughly equivalent to reading uninitialized
216/// memory. While this is much better, it is still bad.
217///
218/// Considering a different direction, recent C++ papers have seemed
219/// to lean towards using `volatile` for similar use cases. For
220/// example, in P1152R0 \[4\], JF Bastien notes that
221///
222/// > We’ve shown that volatile is purposely defined to denote
223/// > external modifications. This happens for:
224/// > - Shared memory with untrusted code, where volatile is the
225/// > right way to avoid time-of-check time-of-use (ToCToU)
226/// > races which lead to security bugs such as \[PWN2OWN\] and
227/// > \[XENXSA155\].
228///
229/// Unfortunately, although this paper was adopted for C++20 (and,
230/// sadly, mostly un-adopted for C++23, although that does not concern
231/// us), the paper did not actually redefine volatile accesses or data
232/// races to prevent volatile accesses from racing with other accesses
233/// and causing undefined behaviour. P1382R1 \[5\] would have amended
234/// the wording of the data race definition to specifically exclude
235/// volatile, but, unfortunately, despite receiving a
236/// generally-positive reception at its first WG21 meeting more than
237/// five years ago, it has not progressed.
238///
239/// Separately from the data race issue, there is also a concern that
240/// according to the various memory models in use, there may be ways
241/// in which the guest can semantically obtain uninitialized memory
242/// and write it into the shared buffer, which may also result in
243/// undefined behaviour on reads. The degree to which this is a
244/// concern is unclear, however, since it is unclear to what degree
245/// the Rust abstract machine's conception of uninitialized memory
246/// applies to the sandbox. Returning briefly to the LLVM level,
247/// rather than the Rust level, this, combined with the fact that
248/// racing loads in LLVM return `undef`, as discussed above, we would
249/// ideally `llvm.freeze` the result of any load out of the sandbox.
250///
251/// It would furthermore be ideal if we could run the flatbuffers
252/// parsing code directly on the guest memory, in order to avoid
253/// unnecessary copies. That is unfortunately probably not viable at
254/// the present time: because the generated flatbuffers parsing code
255/// doesn't use atomic or volatile accesses, it is likely to introduce
256/// double-read vulnerabilities.
257///
258/// In short, none of the Rust-level operations available to us do the
259/// right thing, at the Rust spec level or the LLVM spec level. Our
260/// major remaining options are therefore:
261/// - Choose one of the options that is available to us, and accept
262/// that we are doing something unsound according to the spec, but
263/// hope that no reasonable compiler could possibly notice.
264/// - Use inline assembly per architecture, for which we would only
265/// need to worry about the _architecture_'s memory model (which
266/// is far less demanding).
267///
268/// The leading candidate for the first option would seem to be to
269/// simply use volatile accesses; there seems to be wide agreement
270/// that this _should_ be a valid use case for them (even if it isn't
271/// now), and projects like Linux and rust-vmm already use C11
272/// `volatile` for this purpose. It is also worth noting that because
273/// we still do need to synchronize with the guest when it _is_ being
274/// well-behaved, we would ideally use volatile acquire loads and
275/// volatile release stores for interacting with the stack pointer in
276/// the guest in this case. Unfortunately, while those operations are
277/// defined in LLVM, they are not presently exposed to Rust. While
278/// atomic fences that are not associated with memory accesses
279/// ([`std::sync::atomic::fence`]) might at first glance seem to help with
280/// this problem, they unfortunately do not \[6\]:
281///
282/// > A fence ‘A’ which has (at least) Release ordering semantics,
283/// > synchronizes with a fence ‘B’ with (at least) Acquire
284/// > semantics, if and only if there exist operations X and Y,
285/// > both operating on some atomic object ‘M’ such that A is
286/// > sequenced before X, Y is sequenced before B and Y observes
287/// > the change to M. This provides a happens-before dependence
288/// > between A and B.
289///
290/// Note that the X and Y must be to an _atomic_ object.
291///
292/// We consequently assume that there has been a strong architectural
293/// fence on a vmenter/vmexit between data being read and written.
294/// This is unsafe (not guaranteed in the type system)!
295///
296/// \[1\] N3047 C23 Working Draft. <https://www.open-std.org/jtc1/sc22/wg14/www/docs/n3047.pdf>
297/// \[2\] N4950 C++23 Working Draft. <https://www.open-std.org/jtc1/sc22/wg21/docs/papers/2023/n4950.pdf>
298/// \[3\] LLVM Language Reference Manual, Memory Model for Concurrent Operations. <https://llvm.org/docs/LangRef.html#memmodel>
299/// \[4\] P1152R0: Deprecating `volatile`. JF Bastien. <https://www.open-std.org/jtc1/sc22/wg21/docs/papers/2018/p1152r0.html>
300/// \[5\] P1382R1: `volatile_load<T>` and `volatile_store<T>`. JF Bastien, Paul McKenney, Jeffrey Yasskin, and the indefatigable TBD. <https://www.open-std.org/jtc1/sc22/wg21/docs/papers/2019/p1382r1.pdf>
301/// \[6\] Documentation for std::sync::atomic::fence. <https://doc.rust-lang.org/std/sync/atomic/fn.fence.html>
302///
303/// # Note \[Keeping mappings in sync between userspace and the guest\]
304///
305/// When using this structure with mshv on Linux, it is necessary to
306/// be a little bit careful: since the hypervisor is not directly
307/// integrated with the host kernel virtual memory subsystem, it is
308/// easy for the memory region in userspace to get out of sync with
309/// the memory region mapped into the guest. Generally speaking, when
310/// the [`SharedMemory`] is mapped into a partition, the MSHV kernel
311/// module will call `pin_user_pages(FOLL_PIN|FOLL_WRITE)` on it,
312/// which will eagerly do any CoW, etc needing to obtain backing pages
313/// pinned in memory, and then map precisely those backing pages into
314/// the virtual machine. After that, the backing pages mapped into the
315/// VM will not change until the region is unmapped or remapped. This
316/// means that code in this module needs to be very careful to avoid
317/// changing the backing pages of the region in the host userspace,
318/// since that would result in hyperlight-host's view of the memory
319/// becoming completely divorced from the view of the VM.
320#[derive(Clone, Debug)]
321pub struct HostSharedMemory {
322 region: Arc<HostMapping>,
323 lock: Arc<RwLock<()>>,
324}
325unsafe impl Send for HostSharedMemory {}
326
327impl ExclusiveSharedMemory {
328 /// Create a new region of shared memory with the given minimum
329 /// size in bytes. The region will be surrounded by guard pages.
330 ///
331 /// Return `Err` if shared memory could not be allocated.
332 #[cfg(target_os = "linux")]
333 #[instrument(skip_all, parent = Span::current(), level= "Trace")]
334 pub fn new(min_size_bytes: usize) -> Result<Self> {
335 use libc::{
336 MAP_ANONYMOUS, MAP_FAILED, MAP_PRIVATE, PROT_READ, PROT_WRITE, c_int, mmap, off_t,
337 size_t,
338 };
339 #[cfg(not(miri))]
340 use libc::{MAP_NORESERVE, PROT_NONE, mprotect};
341
342 if min_size_bytes == 0 {
343 return Err(new_error!("Cannot create shared memory with size 0"));
344 }
345
346 let total_size = min_size_bytes
347 .checked_add(2 * PAGE_SIZE_USIZE) // guard page around the memory
348 .ok_or_else(|| new_error!("Memory required for sandbox exceeded usize::MAX"))?;
349
350 if total_size % PAGE_SIZE_USIZE != 0 {
351 return Err(new_error!(
352 "shared memory must be a multiple of {}",
353 PAGE_SIZE_USIZE
354 ));
355 }
356
357 // usize and isize are guaranteed to be the same size, and
358 // isize::MAX should be positive, so this cast should be safe.
359 if total_size > isize::MAX as usize {
360 return Err(HyperlightError::MemoryRequestTooBig(
361 total_size,
362 isize::MAX as usize,
363 ));
364 }
365
366 // allocate the memory
367 #[cfg(not(miri))]
368 let flags = MAP_ANONYMOUS | MAP_PRIVATE | MAP_NORESERVE;
369 #[cfg(miri)]
370 let flags = MAP_ANONYMOUS | MAP_PRIVATE;
371
372 let addr = unsafe {
373 mmap(
374 null_mut(),
375 total_size as size_t,
376 PROT_READ | PROT_WRITE,
377 flags,
378 -1 as c_int,
379 0 as off_t,
380 )
381 };
382 if addr == MAP_FAILED {
383 log_then_return!(HyperlightError::MmapFailed(
384 Error::last_os_error().raw_os_error()
385 ));
386 }
387
388 // protect the guard pages
389 #[cfg(not(miri))]
390 {
391 let res = unsafe { mprotect(addr, PAGE_SIZE_USIZE, PROT_NONE) };
392 if res != 0 {
393 return Err(HyperlightError::MprotectFailed(
394 Error::last_os_error().raw_os_error(),
395 ));
396 }
397 let res = unsafe {
398 mprotect(
399 (addr as *const u8).add(total_size - PAGE_SIZE_USIZE) as *mut c_void,
400 PAGE_SIZE_USIZE,
401 PROT_NONE,
402 )
403 };
404 if res != 0 {
405 return Err(HyperlightError::MprotectFailed(
406 Error::last_os_error().raw_os_error(),
407 ));
408 }
409 }
410
411 Ok(Self {
412 // HostMapping is only non-Send/Sync because raw pointers
413 // are not ("as a lint", as the Rust docs say). We don't
414 // want to mark HostMapping Send/Sync immediately, because
415 // that could socially imply that it's "safe" to use
416 // unsafe accesses from multiple threads at once. Instead, we
417 // directly impl Send and Sync on this type. Since this
418 // type does have Send and Sync manually impl'd, the Arc
419 // is not pointless as the lint suggests.
420 #[allow(clippy::arc_with_non_send_sync)]
421 region: Arc::new(HostMapping {
422 ptr: addr as *mut u8,
423 size: total_size,
424 }),
425 })
426 }
427
428 /// Create a new region of shared memory with the given minimum
429 /// size in bytes. The region will be surrounded by guard pages.
430 ///
431 /// Return `Err` if shared memory could not be allocated.
432 #[cfg(target_os = "windows")]
433 #[instrument(skip_all, parent = Span::current(), level= "Trace")]
434 pub fn new(min_size_bytes: usize) -> Result<Self> {
435 if min_size_bytes == 0 {
436 return Err(new_error!("Cannot create shared memory with size 0"));
437 }
438
439 let total_size = min_size_bytes
440 .checked_add(2 * PAGE_SIZE_USIZE)
441 .ok_or_else(|| new_error!("Memory required for sandbox exceeded {}", usize::MAX))?;
442
443 if total_size % PAGE_SIZE_USIZE != 0 {
444 return Err(new_error!(
445 "shared memory must be a multiple of {}",
446 PAGE_SIZE_USIZE
447 ));
448 }
449
450 // usize and isize are guaranteed to be the same size, and
451 // isize::MAX should be positive, so this cast should be safe.
452 if total_size > isize::MAX as usize {
453 return Err(HyperlightError::MemoryRequestTooBig(
454 total_size,
455 isize::MAX as usize,
456 ));
457 }
458
459 let mut dwmaximumsizehigh = 0;
460 let mut dwmaximumsizelow = 0;
461
462 if std::mem::size_of::<usize>() == 8 {
463 dwmaximumsizehigh = (total_size >> 32) as u32;
464 dwmaximumsizelow = (total_size & 0xFFFFFFFF) as u32;
465 }
466
467 // Allocate the memory use CreateFileMapping instead of VirtualAlloc
468 // This allows us to map the memory into the surrogate process using MapViewOfFile2
469
470 let flags = PAGE_READWRITE;
471
472 let handle = unsafe {
473 CreateFileMappingA(
474 INVALID_HANDLE_VALUE,
475 None,
476 flags,
477 dwmaximumsizehigh,
478 dwmaximumsizelow,
479 PCSTR::null(),
480 )?
481 };
482
483 if handle.is_invalid() {
484 log_then_return!(HyperlightError::MemoryAllocationFailed(
485 Error::last_os_error().raw_os_error()
486 ));
487 }
488
489 let file_map = FILE_MAP_ALL_ACCESS;
490 let addr = unsafe { MapViewOfFile(handle, file_map, 0, 0, 0) };
491
492 if addr.Value.is_null() {
493 log_then_return!(HyperlightError::MemoryAllocationFailed(
494 Error::last_os_error().raw_os_error()
495 ));
496 }
497
498 // Set the first and last pages to be guard pages
499
500 let mut unused_out_old_prot_flags = PAGE_PROTECTION_FLAGS(0);
501
502 // If the following calls to VirtualProtect are changed make sure to update the calls to VirtualProtectEx in surrogate_process_manager.rs
503
504 let first_guard_page_start = addr.Value;
505 if let Err(e) = unsafe {
506 VirtualProtect(
507 first_guard_page_start,
508 PAGE_SIZE_USIZE,
509 PAGE_NOACCESS,
510 &mut unused_out_old_prot_flags,
511 )
512 } {
513 log_then_return!(WindowsAPIError(e.clone()));
514 }
515
516 let last_guard_page_start = unsafe { addr.Value.add(total_size - PAGE_SIZE_USIZE) };
517 if let Err(e) = unsafe {
518 VirtualProtect(
519 last_guard_page_start,
520 PAGE_SIZE_USIZE,
521 PAGE_NOACCESS,
522 &mut unused_out_old_prot_flags,
523 )
524 } {
525 log_then_return!(WindowsAPIError(e.clone()));
526 }
527
528 Ok(Self {
529 // HostMapping is only non-Send/Sync because raw pointers
530 // are not ("as a lint", as the Rust docs say). We don't
531 // want to mark HostMapping Send/Sync immediately, because
532 // that could socially imply that it's "safe" to use
533 // unsafe accesses from multiple threads at once. Instead, we
534 // directly impl Send and Sync on this type. Since this
535 // type does have Send and Sync manually impl'd, the Arc
536 // is not pointless as the lint suggests.
537 #[allow(clippy::arc_with_non_send_sync)]
538 region: Arc::new(HostMapping {
539 ptr: addr.Value as *mut u8,
540 size: total_size,
541 handle,
542 }),
543 })
544 }
545
546 /// Internal helper method to get the backing memory as a mutable slice.
547 ///
548 /// # Safety
549 /// As per std::slice::from_raw_parts_mut:
550 /// - self.base_addr() must be valid for both reads and writes for
551 /// self.mem_size() * mem::size_of::<u8>() many bytes, and it
552 /// must be properly aligned.
553 ///
554 /// The rules on validity are still somewhat unspecified, but we
555 /// assume that the result of our calls to mmap/CreateFileMappings may
556 /// be considered a single "allocated object". The use of
557 /// non-atomic accesses is alright from a Safe Rust standpoint,
558 /// because SharedMemoryBuilder is not Sync.
559 /// - self.base_addr() must point to self.mem_size() consecutive
560 /// properly initialized values of type u8
561 ///
562 /// Again, the exact provenance restrictions on what is
563 /// considered to be initialized values are unclear, but we make
564 /// sure to use mmap(MAP_ANONYMOUS) and
565 /// CreateFileMapping(SEC_COMMIT), so the pages in question are
566 /// zero-initialized, which we hope counts for u8.
567 /// - The memory referenced by the returned slice must not be
568 /// accessed through any other pointer (not derived from the
569 /// return value) for the duration of the lifetime 'a. Both read
570 /// and write accesses are forbidden.
571 ///
572 /// Accesses from Safe Rust necessarily follow this rule,
573 /// because the returned slice's lifetime is the same as that of
574 /// a mutable borrow of self.
575 /// - The total size self.mem_size() * mem::size_of::<u8>() of the
576 /// slice must be no larger than isize::MAX, and adding that
577 /// size to data must not "wrap around" the address space. See
578 /// the safety documentation of pointer::offset.
579 ///
580 /// This is ensured by a check in ::new()
581 pub(super) fn as_mut_slice(&mut self) -> &mut [u8] {
582 unsafe { std::slice::from_raw_parts_mut(self.base_ptr(), self.mem_size()) }
583 }
584
585 /// Internal helper method to get the backing memory as a slice.
586 ///
587 /// # Safety
588 /// See the discussion on as_mut_slice, with the third point
589 /// replaced by:
590 /// - The memory referenced by the returned slice must not be
591 /// mutated for the duration of lifetime 'a, except inside an
592 /// UnsafeCell.
593 ///
594 /// Host accesses from Safe Rust necessarily follow this rule,
595 /// because the returned slice's lifetime is the same as that of
596 /// a borrow of self, preventing mutations via other methods.
597 #[instrument(skip_all, parent = Span::current(), level= "Trace")]
598 pub fn as_slice<'a>(&'a self) -> &'a [u8] {
599 unsafe { std::slice::from_raw_parts(self.base_ptr(), self.mem_size()) }
600 }
601
602 /// Copies all bytes from `src` to `self` starting at offset
603 #[instrument(err(Debug), skip_all, parent = Span::current(), level= "Trace")]
604 pub fn copy_from_slice(&mut self, src: &[u8], offset: usize) -> Result<()> {
605 let data = self.as_mut_slice();
606 bounds_check!(offset, src.len(), data.len());
607 data[offset..offset + src.len()].copy_from_slice(src);
608 Ok(())
609 }
610
611 generate_reader!(read_u8, u8);
612 generate_reader!(read_i8, i8);
613 generate_reader!(read_u16, u16);
614 generate_reader!(read_i16, i16);
615 generate_reader!(read_u32, u32);
616 generate_reader!(read_i32, i32);
617 generate_reader!(read_u64, u64);
618 generate_reader!(read_i64, i64);
619 generate_reader!(read_usize, usize);
620 generate_reader!(read_isize, isize);
621
622 generate_writer!(write_u8, u8);
623 generate_writer!(write_i8, i8);
624 generate_writer!(write_u16, u16);
625 generate_writer!(write_i16, i16);
626 generate_writer!(write_u32, u32);
627 generate_writer!(write_i32, i32);
628 generate_writer!(write_u64, u64);
629 generate_writer!(write_i64, i64);
630 generate_writer!(write_usize, usize);
631 generate_writer!(write_isize, isize);
632
633 /// Convert the ExclusiveSharedMemory, which may be freely
634 /// modified, into a GuestSharedMemory, which may be somewhat
635 /// freely modified (mostly by the guest), and a HostSharedMemory,
636 /// which may only make certain kinds of accesses that do not race
637 /// in the presence of malicious code inside the guest mutating
638 /// the GuestSharedMemory.
639 pub fn build(self) -> (HostSharedMemory, GuestSharedMemory) {
640 let lock = Arc::new(RwLock::new(()));
641 let hshm = HostSharedMemory {
642 region: self.region.clone(),
643 lock: lock.clone(),
644 };
645 (
646 hshm,
647 GuestSharedMemory {
648 region: self.region.clone(),
649 lock,
650 },
651 )
652 }
653
654 /// Gets the file handle of the shared memory region for this Sandbox
655 #[cfg(target_os = "windows")]
656 pub fn get_mmap_file_handle(&self) -> HANDLE {
657 self.region.handle
658 }
659
660 /// Create a [`HostSharedMemory`] view of this region without
661 /// consuming `self`. Used in tests where the full `build()` /
662 /// `evolve()` pipeline is not available.
663 #[cfg(all(test, feature = "guest-counter"))]
664 pub(crate) fn as_host_shared_memory(&self) -> HostSharedMemory {
665 let lock = Arc::new(RwLock::new(()));
666 HostSharedMemory {
667 region: self.region.clone(),
668 lock,
669 }
670 }
671}
672
673fn mapping_at(
674 s: &impl SharedMemory,
675 gpa: u64,
676 size: usize,
677 region_type: MemoryRegionType,
678 flags: MemoryRegionFlags,
679) -> MemoryRegion {
680 let guest_base = gpa as usize;
681
682 MemoryRegion {
683 guest_region: guest_base..(guest_base + size),
684 host_region: s.host_region_base()
685 ..<HostGuestMemoryRegion as MemoryRegionKind>::add(s.host_region_base(), size),
686 region_type,
687 flags,
688 }
689}
690
691impl GuestSharedMemory {
692 /// Create a [`super::memory_region::MemoryRegion`] structure
693 /// suitable for mapping this region into a VM
694 pub(crate) fn mapping_at(
695 &self,
696 guest_base: u64,
697 region_type: MemoryRegionType,
698 ) -> MemoryRegion {
699 let flags = match region_type {
700 MemoryRegionType::Scratch => {
701 MemoryRegionFlags::READ | MemoryRegionFlags::WRITE | MemoryRegionFlags::EXECUTE
702 }
703 #[cfg(unshared_snapshot_mem)]
704 MemoryRegionType::Snapshot => {
705 MemoryRegionFlags::READ | MemoryRegionFlags::WRITE | MemoryRegionFlags::EXECUTE
706 }
707 #[allow(clippy::panic)]
708 // This will not ever actually panic: the only places this
709 // is called are HyperlightVm::update_snapshot_mapping and
710 // HyperlightVm::update_scratch_mapping. The latter
711 // statically uses the Scratch region type, and the former
712 // does not use this at all when the unshared_snapshot_mem
713 // feature is not set, since in that case the scratch
714 // mapping type is ReadonlySharedMemory, not
715 // GuestSharedMemory.
716 _ => panic!(
717 "GuestSharedMemory::mapping_at should only be used for Scratch or Snapshot regions"
718 ),
719 };
720 mapping_at(self, guest_base, self.mem_size(), region_type, flags)
721 }
722}
723
724/// A trait that abstracts over the particular kind of SharedMemory,
725/// used when invoking operations from Rust that absolutely must have
726/// exclusive control over the shared memory for correctness +
727/// performance, like snapshotting.
728pub trait SharedMemory {
729 /// Return a readonly reference to the host mapping backing this SharedMemory
730 fn region(&self) -> &HostMapping;
731
732 /// Return the base address of the host mapping of this
733 /// region. Following the general Rust philosophy, this does not
734 /// need to be marked as `unsafe` because doing anything with this
735 /// pointer itself requires `unsafe`.
736 fn base_addr(&self) -> usize {
737 self.region().ptr as usize + PAGE_SIZE_USIZE
738 }
739
740 /// Return the base address of the host mapping of this region as
741 /// a pointer. Following the general Rust philosophy, this does
742 /// not need to be marked as `unsafe` because doing anything with
743 /// this pointer itself requires `unsafe`.
744 fn base_ptr(&self) -> *mut u8 {
745 self.region().ptr.wrapping_add(PAGE_SIZE_USIZE)
746 }
747
748 /// Return the length of usable memory contained in `self`.
749 /// The returned size does not include the size of the surrounding
750 /// guard pages.
751 fn mem_size(&self) -> usize {
752 self.region().size - 2 * PAGE_SIZE_USIZE
753 }
754
755 /// Return the raw base address of the host mapping, including the
756 /// guard pages.
757 fn raw_ptr(&self) -> *mut u8 {
758 self.region().ptr
759 }
760
761 /// Return the raw size of the host mapping, including the guard
762 /// pages.
763 fn raw_mem_size(&self) -> usize {
764 self.region().size
765 }
766
767 /// Extract a base address that can be mapped into a VM for this
768 /// SharedMemory.
769 ///
770 /// On Linux this returns a raw `usize` pointer. On Windows it
771 /// returns a `HostRegionBase` (see `super::memory_region`)
772 /// that carries the file-mapping handle metadata needed by WHP.
773 fn host_region_base(&self) -> <HostGuestMemoryRegion as MemoryRegionKind>::HostBaseType {
774 #[cfg(not(windows))]
775 {
776 self.base_addr()
777 }
778 #[cfg(windows)]
779 {
780 super::memory_region::HostRegionBase {
781 from_handle: self.region().handle.into(),
782 handle_base: self.region().ptr as usize,
783 handle_size: self.region().size,
784 offset: PAGE_SIZE_USIZE,
785 }
786 }
787 }
788
789 /// Return the end address of the host region (base + usable size).
790 fn host_region_end(&self) -> <HostGuestMemoryRegion as MemoryRegionKind>::HostBaseType {
791 <HostGuestMemoryRegion as MemoryRegionKind>::add(self.host_region_base(), self.mem_size())
792 }
793
794 /// Run some code with exclusive access to the SharedMemory
795 /// underlying this. If the SharedMemory is not an
796 /// ExclusiveSharedMemory, any concurrent accesses to the relevant
797 /// HostSharedMemory/GuestSharedMemory may make this fail, or be
798 /// made to fail by this, and should be avoided.
799 fn with_exclusivity<T, F: FnOnce(&mut ExclusiveSharedMemory) -> T>(
800 &mut self,
801 f: F,
802 ) -> Result<T>;
803
804 /// Run some code that is allowed to access the contents of the
805 /// SharedMemory as if it is a normal slice. By default, this is
806 /// implemented via [`SharedMemory::with_exclusivity`], which is
807 /// the correct implementation for a memory that can be mutated,
808 /// but a [`ReadonlySharedMemory`], can support this.
809 fn with_contents<T, F: FnOnce(&[u8]) -> T>(&mut self, f: F) -> Result<T> {
810 self.with_exclusivity(|m| f(m.as_slice()))
811 }
812
813 /// Zero a shared memory region
814 fn zero(&mut self) -> Result<()> {
815 self.with_exclusivity(|e| {
816 #[allow(unused_mut)] // unused on some platforms, although not others
817 let mut do_copy = true;
818 // TODO: Compare & add heuristic thresholds: mmap, MADV_DONTNEED, MADV_REMOVE, MADV_FREE (?)
819 // TODO: Find a similar lazy zeroing approach that works on MSHV.
820 // (See Note [Keeping mappings in sync between userspace and the guest])
821 #[cfg(all(target_os = "linux", feature = "kvm", not(any(feature = "mshv3"))))]
822 unsafe {
823 let ret = libc::madvise(
824 e.region.ptr as *mut libc::c_void,
825 e.region.size,
826 libc::MADV_DONTNEED,
827 );
828 if ret == 0 {
829 do_copy = false;
830 }
831 }
832 if do_copy {
833 e.as_mut_slice().fill(0);
834 }
835 })
836 }
837}
838
839impl SharedMemory for ExclusiveSharedMemory {
840 fn region(&self) -> &HostMapping {
841 &self.region
842 }
843 fn with_exclusivity<T, F: FnOnce(&mut ExclusiveSharedMemory) -> T>(
844 &mut self,
845 f: F,
846 ) -> Result<T> {
847 Ok(f(self))
848 }
849}
850
851impl SharedMemory for GuestSharedMemory {
852 fn region(&self) -> &HostMapping {
853 &self.region
854 }
855 fn with_exclusivity<T, F: FnOnce(&mut ExclusiveSharedMemory) -> T>(
856 &mut self,
857 f: F,
858 ) -> Result<T> {
859 let guard = self
860 .lock
861 .try_write()
862 .map_err(|e| new_error!("Error locking at {}:{}: {}", file!(), line!(), e))?;
863 let mut excl = ExclusiveSharedMemory {
864 region: self.region.clone(),
865 };
866 let ret = f(&mut excl);
867 drop(excl);
868 drop(guard);
869 Ok(ret)
870 }
871}
872
873/// An unsafe marker trait for types for which all bit patterns are valid.
874/// This is required in order for it to be safe to read a value of a particular
875/// type out of the sandbox from the HostSharedMemory.
876///
877/// # Safety
878/// This must only be implemented for types for which all bit patterns
879/// are valid. It requires that any (non-undef/poison) value of the
880/// correct size can be transmuted to the type.
881pub unsafe trait AllValid {}
882unsafe impl AllValid for u8 {}
883unsafe impl AllValid for u16 {}
884unsafe impl AllValid for u32 {}
885unsafe impl AllValid for u64 {}
886unsafe impl AllValid for i8 {}
887unsafe impl AllValid for i16 {}
888unsafe impl AllValid for i32 {}
889unsafe impl AllValid for i64 {}
890unsafe impl AllValid for [u8; 16] {}
891
892impl HostSharedMemory {
893 /// Read a value of type T, whose representation is the same
894 /// between the sandbox and the host, and which has no invalid bit
895 /// patterns
896 pub fn read<T: AllValid>(&self, offset: usize) -> Result<T> {
897 bounds_check!(offset, std::mem::size_of::<T>(), self.mem_size());
898 unsafe {
899 let mut ret: core::mem::MaybeUninit<T> = core::mem::MaybeUninit::uninit();
900 {
901 let slice: &mut [u8] = core::slice::from_raw_parts_mut(
902 ret.as_mut_ptr() as *mut u8,
903 std::mem::size_of::<T>(),
904 );
905 self.copy_to_slice(slice, offset)?;
906 }
907 Ok(ret.assume_init())
908 }
909 }
910
911 /// Write a value of type T, whose representation is the same
912 /// between the sandbox and the host, and which has no invalid bit
913 /// patterns
914 pub fn write<T: AllValid>(&self, offset: usize, data: T) -> Result<()> {
915 bounds_check!(offset, std::mem::size_of::<T>(), self.mem_size());
916 unsafe {
917 let slice: &[u8] = core::slice::from_raw_parts(
918 core::ptr::addr_of!(data) as *const u8,
919 std::mem::size_of::<T>(),
920 );
921 self.copy_from_slice(slice, offset)?;
922 }
923 Ok(())
924 }
925
926 /// Copy the contents of the slice into the sandbox at the
927 /// specified offset
928 pub fn copy_to_slice(&self, slice: &mut [u8], offset: usize) -> Result<()> {
929 bounds_check!(offset, slice.len(), self.mem_size());
930 let base = self.base_ptr().wrapping_add(offset);
931 let guard = self
932 .lock
933 .try_read()
934 .map_err(|e| new_error!("Error locking at {}:{}: {}", file!(), line!(), e))?;
935
936 const CHUNK: usize = size_of::<u128>();
937 let len = slice.len();
938 let mut i = 0;
939
940 // Handle unaligned head bytes until we reach u128 alignment.
941 // Note: align_offset can return usize::MAX if alignment is impossible.
942 // In that case, head_len = len via .min(), so we fall back to byte-by-byte
943 // operations for the entire slice.
944 let align_offset = base.align_offset(align_of::<u128>());
945 let head_len = align_offset.min(len);
946 while i < head_len {
947 unsafe {
948 slice[i] = base.add(i).read_volatile();
949 }
950 i += 1;
951 }
952
953 // Read aligned u128 chunks
954 // SAFETY: After processing head_len bytes, base.add(i) is u128-aligned.
955 // We use write_unaligned for the destination since the slice may not be u128-aligned.
956 let dst = slice.as_mut_ptr();
957 while i + CHUNK <= len {
958 unsafe {
959 let value = (base.add(i) as *const u128).read_volatile();
960 std::ptr::write_unaligned(dst.add(i) as *mut u128, value);
961 }
962 i += CHUNK;
963 }
964
965 // Handle remaining tail bytes
966 while i < len {
967 unsafe {
968 slice[i] = base.add(i).read_volatile();
969 }
970 i += 1;
971 }
972
973 drop(guard);
974 Ok(())
975 }
976
977 /// Copy the contents of the sandbox at the specified offset into
978 /// the slice
979 pub fn copy_from_slice(&self, slice: &[u8], offset: usize) -> Result<()> {
980 bounds_check!(offset, slice.len(), self.mem_size());
981 let base = self.base_ptr().wrapping_add(offset);
982 let guard = self
983 .lock
984 .try_read()
985 .map_err(|e| new_error!("Error locking at {}:{}: {}", file!(), line!(), e))?;
986
987 const CHUNK: usize = size_of::<u128>();
988 let len = slice.len();
989 let mut i = 0;
990
991 // Handle unaligned head bytes until we reach u128 alignment.
992 // Note: align_offset can return usize::MAX if alignment is impossible.
993 // In that case, head_len = len via .min(), so we fall back to byte-by-byte
994 // operations for the entire slice.
995 let align_offset = base.align_offset(align_of::<u128>());
996 let head_len = align_offset.min(len);
997 while i < head_len {
998 unsafe {
999 base.add(i).write_volatile(slice[i]);
1000 }
1001 i += 1;
1002 }
1003
1004 // Write aligned u128 chunks
1005 // SAFETY: After processing head_len bytes, base.add(i) is u128-aligned.
1006 // We use read_unaligned for the source since the slice may not be u128-aligned.
1007 let src = slice.as_ptr();
1008 while i + CHUNK <= len {
1009 unsafe {
1010 let value = std::ptr::read_unaligned(src.add(i) as *const u128);
1011 (base.add(i) as *mut u128).write_volatile(value);
1012 }
1013 i += CHUNK;
1014 }
1015
1016 // Handle remaining tail bytes
1017 while i < len {
1018 unsafe {
1019 base.add(i).write_volatile(slice[i]);
1020 }
1021 i += 1;
1022 }
1023
1024 drop(guard);
1025 Ok(())
1026 }
1027
1028 /// Fill the memory in the range `[offset, offset + len)` with `value`
1029 #[instrument(err(Debug), skip_all, parent = Span::current(), level= "Trace")]
1030 pub fn fill(&mut self, value: u8, offset: usize, len: usize) -> Result<()> {
1031 bounds_check!(offset, len, self.mem_size());
1032 let base = self.base_ptr().wrapping_add(offset);
1033 let guard = self
1034 .lock
1035 .try_read()
1036 .map_err(|e| new_error!("Error locking at {}:{}: {}", file!(), line!(), e))?;
1037
1038 const CHUNK: usize = size_of::<u128>();
1039 let value_u128 = u128::from_ne_bytes([value; CHUNK]);
1040 let mut i = 0;
1041
1042 // Handle unaligned head bytes until we reach u128 alignment.
1043 // Note: align_offset can return usize::MAX if alignment is impossible.
1044 // In that case, head_len = len via .min(), so we fall back to byte-by-byte
1045 // operations for the entire slice.
1046 let align_offset = base.align_offset(align_of::<u128>());
1047 let head_len = align_offset.min(len);
1048 while i < head_len {
1049 unsafe {
1050 base.add(i).write_volatile(value);
1051 }
1052 i += 1;
1053 }
1054
1055 // Write aligned u128 chunks
1056 // SAFETY: After processing head_len bytes, base.add(i) is u128-aligned
1057 while i + CHUNK <= len {
1058 unsafe {
1059 (base.add(i) as *mut u128).write_volatile(value_u128);
1060 }
1061 i += CHUNK;
1062 }
1063
1064 // Handle remaining tail bytes
1065 while i < len {
1066 unsafe {
1067 base.add(i).write_volatile(value);
1068 }
1069 i += 1;
1070 }
1071
1072 drop(guard);
1073 Ok(())
1074 }
1075
1076 /// Pushes the given data onto shared memory to the buffer at the given offset.
1077 /// NOTE! buffer_start_offset must point to the beginning of the buffer
1078 #[instrument(err(Debug), skip_all, parent = Span::current(), level= "Trace")]
1079 pub fn push_buffer(
1080 &mut self,
1081 buffer_start_offset: usize,
1082 buffer_size: usize,
1083 data: &[u8],
1084 ) -> Result<()> {
1085 let stack_pointer_rel = self.read::<u64>(buffer_start_offset)? as usize;
1086 let buffer_size_u64: u64 = buffer_size.try_into()?;
1087
1088 if stack_pointer_rel > buffer_size || stack_pointer_rel < 8 {
1089 return Err(new_error!(
1090 "Unable to push data to buffer: Stack pointer is out of bounds. Stack pointer: {}, Buffer size: {}",
1091 stack_pointer_rel,
1092 buffer_size_u64
1093 ));
1094 }
1095
1096 let size_required = data.len() + 8;
1097 let size_available = buffer_size - stack_pointer_rel;
1098
1099 if size_required > size_available {
1100 return Err(new_error!(
1101 "Not enough space in buffer to push data. Required: {}, Available: {}",
1102 size_required,
1103 size_available
1104 ));
1105 }
1106
1107 // get absolute
1108 let stack_pointer_abs = stack_pointer_rel + buffer_start_offset;
1109
1110 // write the actual data to the top of stack
1111 self.copy_from_slice(data, stack_pointer_abs)?;
1112
1113 // write the offset to the newly written data, to the top of stack.
1114 // this is used when popping the stack, to know how far back to jump
1115 self.write::<u64>(stack_pointer_abs + data.len(), stack_pointer_rel as u64)?;
1116
1117 // update stack pointer to point to the next free address
1118 self.write::<u64>(
1119 buffer_start_offset,
1120 (stack_pointer_rel + data.len() + 8) as u64,
1121 )?;
1122 Ok(())
1123 }
1124
1125 /// Pop the top element of the ring as raw bytes. Unlike
1126 /// [`Self::try_pop_buffer_into`], this doesn't peek at the element's
1127 /// contents — the element size is recovered from the trailing
1128 /// back-pointer that [`Self::push_buffer`] wrote.
1129 pub fn try_pop_buffer_raw(
1130 &mut self,
1131 buffer_start_offset: usize,
1132 buffer_size: usize,
1133 ) -> Result<Vec<u8>> {
1134 let stack_pointer_rel = self.read::<u64>(buffer_start_offset)? as usize;
1135
1136 if stack_pointer_rel > buffer_size || stack_pointer_rel < 16 {
1137 return Err(new_error!(
1138 "try_pop_buffer_raw: stack pointer {} out of bounds (size {})",
1139 stack_pointer_rel,
1140 buffer_size
1141 ));
1142 }
1143
1144 let back_ptr_abs = stack_pointer_rel + buffer_start_offset - 8;
1145 let element_offset_rel = self.read::<u64>(back_ptr_abs)? as usize;
1146
1147 if element_offset_rel < 8 || element_offset_rel > stack_pointer_rel.saturating_sub(8) {
1148 return Err(new_error!(
1149 "try_pop_buffer_raw: back-pointer {} outside [8, {}]",
1150 element_offset_rel,
1151 stack_pointer_rel.saturating_sub(8)
1152 ));
1153 }
1154
1155 let element_size = stack_pointer_rel - element_offset_rel - 8;
1156 let element_abs = element_offset_rel + buffer_start_offset;
1157 let mut out = vec![0u8; element_size];
1158 self.copy_to_slice(&mut out, element_abs)?;
1159
1160 // Pop: rewind stack pointer.
1161 self.write::<u64>(buffer_start_offset, element_offset_rel as u64)?;
1162 // Zero out the popped slot + its back-pointer.
1163 self.fill(0, element_abs, stack_pointer_rel - element_offset_rel)?;
1164
1165 Ok(out)
1166 }
1167
1168 /// Pops the given given buffer into a `T` and returns it.
1169 /// NOTE! the data must be a size-prefixed flatbuffer, and
1170 /// buffer_start_offset must point to the beginning of the buffer
1171 pub fn try_pop_buffer_into<T>(
1172 &mut self,
1173 buffer_start_offset: usize,
1174 buffer_size: usize,
1175 ) -> Result<T>
1176 where
1177 T: for<'b> TryFrom<&'b [u8]>,
1178 {
1179 // get the stackpointer
1180 let stack_pointer_rel = self.read::<u64>(buffer_start_offset)? as usize;
1181
1182 if stack_pointer_rel > buffer_size || stack_pointer_rel < 16 {
1183 return Err(new_error!(
1184 "Unable to pop data from buffer: Stack pointer is out of bounds. Stack pointer: {}, Buffer size: {}",
1185 stack_pointer_rel,
1186 buffer_size
1187 ));
1188 }
1189
1190 // make it absolute
1191 let last_element_offset_abs = stack_pointer_rel + buffer_start_offset;
1192
1193 // go back 8 bytes to get offset to element on top of stack
1194 let last_element_offset_rel: usize =
1195 self.read::<u64>(last_element_offset_abs - 8)? as usize;
1196
1197 // Validate element offset (guest-writable): must be in [8, stack_pointer_rel - 16]
1198 // to leave room for the 8-byte back-pointer plus at least 8 bytes of element data
1199 // (the minimum for a size-prefixed flatbuffer: 4-byte prefix + 4-byte root offset).
1200 if last_element_offset_rel > stack_pointer_rel.saturating_sub(16)
1201 || last_element_offset_rel < 8
1202 {
1203 return Err(new_error!(
1204 "Corrupt buffer back-pointer: element offset {} is outside valid range [8, {}].",
1205 last_element_offset_rel,
1206 stack_pointer_rel.saturating_sub(16),
1207 ));
1208 }
1209
1210 // make it absolute
1211 let last_element_offset_abs = last_element_offset_rel + buffer_start_offset;
1212
1213 // Max bytes the element can span (excluding the 8-byte back-pointer).
1214 let max_element_size = stack_pointer_rel - last_element_offset_rel - 8;
1215
1216 // Get the size of the flatbuffer buffer from memory
1217 let fb_buffer_size = {
1218 let raw_prefix = self.read::<u32>(last_element_offset_abs)?;
1219 // flatbuffer byte arrays are prefixed by 4 bytes indicating
1220 // the remaining size; add 4 for the prefix itself.
1221 let total = raw_prefix.checked_add(4).ok_or_else(|| {
1222 new_error!(
1223 "Corrupt buffer size prefix: value {} overflows when adding 4-byte header.",
1224 raw_prefix
1225 )
1226 })?;
1227 usize::try_from(total)
1228 }?;
1229
1230 if fb_buffer_size > max_element_size {
1231 return Err(new_error!(
1232 "Corrupt buffer size prefix: flatbuffer claims {} bytes but the element slot is only {} bytes.",
1233 fb_buffer_size,
1234 max_element_size
1235 ));
1236 }
1237
1238 let mut result_buffer = vec![0; fb_buffer_size];
1239
1240 self.copy_to_slice(&mut result_buffer, last_element_offset_abs)?;
1241 let to_return = T::try_from(result_buffer.as_slice()).map_err(|_e| {
1242 new_error!(
1243 "pop_buffer_into: failed to convert buffer to {}",
1244 type_name::<T>()
1245 )
1246 })?;
1247
1248 // update the stack pointer to point to the element we just popped off since that is now free
1249 self.write::<u64>(buffer_start_offset, last_element_offset_rel as u64)?;
1250
1251 // zero out the memory we just popped off
1252 let num_bytes_to_zero = stack_pointer_rel - last_element_offset_rel;
1253 self.fill(0, last_element_offset_abs, num_bytes_to_zero)?;
1254
1255 Ok(to_return)
1256 }
1257}
1258
1259impl SharedMemory for HostSharedMemory {
1260 fn region(&self) -> &HostMapping {
1261 &self.region
1262 }
1263 fn with_exclusivity<T, F: FnOnce(&mut ExclusiveSharedMemory) -> T>(
1264 &mut self,
1265 f: F,
1266 ) -> Result<T> {
1267 let guard = self
1268 .lock
1269 .try_write()
1270 .map_err(|e| new_error!("Error locking at {}:{}: {}", file!(), line!(), e))?;
1271 let mut excl = ExclusiveSharedMemory {
1272 region: self.region.clone(),
1273 };
1274 let ret = f(&mut excl);
1275 drop(excl);
1276 drop(guard);
1277 Ok(ret)
1278 }
1279}
1280
1281/// A ReadonlySharedMemory is a different kind of shared memory,
1282/// separate from the exclusive/host/guest lifecycle, used to
1283/// represent read-only mappings of snapshot pages into the guest
1284/// efficiently.
1285#[derive(Clone, Debug)]
1286pub struct ReadonlySharedMemory {
1287 region: Arc<HostMapping>,
1288 /// If `Some`, only this many bytes are mapped into guest PA space
1289 /// by `mapping_at`. If `None`, the full `mem_size()` is mapped.
1290 #[cfg_attr(unshared_snapshot_mem, allow(dead_code))]
1291 guest_mapped_size: Option<usize>,
1292}
1293// Safety: HostMapping is only non-Send/Sync (causing
1294// ReadonlySharedMemory to not be automatically Send/Sync) because raw
1295// pointers are not ("as a lint", as the Rust docs say). We don't want
1296// to mark HostMapping Send/Sync immediately, because that could
1297// socially imply that it's "safe" to use unsafe accesses from
1298// multiple threads at once in more cases, including ones that don't
1299// actually ensure immutability/synchronisation. Since
1300// ReadonlySharedMemory can only be accessed by reading, and reading
1301// concurrently from multiple threads is not racy,
1302// ReadonlySharedMemory can be Send and Sync.
1303unsafe impl Send for ReadonlySharedMemory {}
1304unsafe impl Sync for ReadonlySharedMemory {}
1305
1306impl ReadonlySharedMemory {
1307 pub(crate) fn from_bytes(contents: &[u8]) -> Result<Self> {
1308 let mut anon = ExclusiveSharedMemory::new(contents.len())?;
1309 anon.copy_from_slice(contents, 0)?;
1310 Ok(ReadonlySharedMemory {
1311 region: anon.region,
1312 guest_mapped_size: None,
1313 })
1314 }
1315
1316 /// The number of bytes that should be mapped into guest PA space.
1317 /// Returns `guest_mapped_size` if set, otherwise `mem_size()`.
1318 #[cfg(not(unshared_snapshot_mem))]
1319 pub(crate) fn guest_mapped_size(&self) -> usize {
1320 self.guest_mapped_size.unwrap_or_else(|| self.mem_size())
1321 }
1322
1323 pub(crate) fn as_slice(&self) -> &[u8] {
1324 unsafe { std::slice::from_raw_parts(self.base_ptr(), self.mem_size()) }
1325 }
1326
1327 #[cfg(unshared_snapshot_mem)]
1328 pub(crate) fn copy_to_writable(&self) -> Result<ExclusiveSharedMemory> {
1329 let mut writable = ExclusiveSharedMemory::new(self.mem_size())?;
1330 writable.copy_from_slice(self.as_slice(), 0)?;
1331 Ok(writable)
1332 }
1333
1334 #[cfg(not(unshared_snapshot_mem))]
1335 pub(crate) fn build(self) -> (Self, Self) {
1336 (self.clone(), self)
1337 }
1338
1339 #[cfg(not(unshared_snapshot_mem))]
1340 pub(crate) fn mapping_at(
1341 &self,
1342 guest_base: u64,
1343 region_type: MemoryRegionType,
1344 ) -> MemoryRegion {
1345 #[allow(clippy::panic)]
1346 // This will not ever actually panic: the only place this is
1347 // called is HyperlightVm::update_snapshot_mapping, which
1348 // always calls it with the Snapshot region type.
1349 if region_type != MemoryRegionType::Snapshot {
1350 panic!("ReadonlySharedMemory::mapping_at should only be used for Snapshot regions");
1351 }
1352 // Register snapshot mem RWX at the KVM level. Upstream marked
1353 // this RX-only and relied on guest-PT CoW for write semantics,
1354 // which trapped first writes and resolved them to scratch frames
1355 // — driving a slow leak via prim_alloc. The underlying mmap is
1356 // already PROT_READ|PROT_WRITE; `ReadonlySharedMemory` is a
1357 // host-side Rust-API artifact, not a KVM-level constraint.
1358 mapping_at(
1359 self,
1360 guest_base,
1361 self.guest_mapped_size(),
1362 region_type,
1363 MemoryRegionFlags::READ | MemoryRegionFlags::WRITE | MemoryRegionFlags::EXECUTE,
1364 )
1365 }
1366}
1367
1368impl SharedMemory for ReadonlySharedMemory {
1369 fn region(&self) -> &HostMapping {
1370 &self.region
1371 }
1372 // There's no way to get exclusive (and therefore writable) access
1373 // to a ReadonlySharedMemory.
1374 fn with_exclusivity<T, F: FnOnce(&mut ExclusiveSharedMemory) -> T>(
1375 &mut self,
1376 _: F,
1377 ) -> Result<T> {
1378 Err(new_error!(
1379 "Cannot take exclusive access to a ReadonlySharedMemory"
1380 ))
1381 }
1382 // However, just access to the contents as a slice is doable
1383 fn with_contents<T, F: FnOnce(&[u8]) -> T>(&mut self, f: F) -> Result<T> {
1384 Ok(f(self.as_slice()))
1385 }
1386}
1387
1388impl<S: SharedMemory> PartialEq<S> for ReadonlySharedMemory {
1389 fn eq(&self, other: &S) -> bool {
1390 self.raw_ptr() == other.raw_ptr()
1391 }
1392}