Skip to main content

nub_host_kvm/hypervisor/
mod.rs

1/*
2Copyright 2025  The Hyperlight Authors.
3
4Licensed under the Apache License, Version 2.0 (the "License");
5you may not use this file except in compliance with the License.
6You may obtain a copy of the License at
7
8    http://www.apache.org/licenses/LICENSE-2.0
9
10Unless required by applicable law or agreed to in writing, software
11distributed under the License is distributed on an "AS IS" BASIS,
12WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13See the License for the specific language governing permissions and
14limitations under the License.
15*/
16
17/// Abstracts over different hypervisor register representations
18pub(crate) mod regs;
19
20pub(crate) mod virtual_machine;
21
22pub(crate) mod hyperlight_vm;
23
24use std::fmt::Debug;
25#[cfg(kvm)]
26use std::sync::atomic::{AtomicBool, AtomicU8, AtomicU64, Ordering};
27#[cfg(kvm)]
28use std::time::Duration;
29
30/// A trait for platform-specific interrupt handle implementation details
31pub(crate) trait InterruptHandleImpl: InterruptHandle {
32    /// Set the thread ID for the vcpu thread
33    #[cfg(kvm)]
34    fn set_tid(&self);
35
36    /// Set the running state
37    fn set_running(&self);
38
39    /// Clear the running state
40    fn clear_running(&self);
41
42    /// Mark the handle as dropped
43    fn set_dropped(&self);
44
45    /// Check if cancellation was requested
46    fn is_cancelled(&self) -> bool;
47
48    /// Clear the cancellation request flag
49    fn clear_cancel(&self);
50
51    /// Check if debug interrupt was requested (always returns false when gdb feature is disabled)
52    fn is_debug_interrupted(&self) -> bool;
53
54    // Clear the debug interrupt request flag
55    #[cfg(gdb)]
56    fn clear_debug_interrupt(&self);
57}
58
59/// A trait for handling interrupts to a sandbox's vcpu
60pub trait InterruptHandle: Send + Sync + Debug {
61    /// Interrupt the corresponding sandbox from running.
62    ///
63    /// - If this is called while the the sandbox currently executing a guest function call, it will interrupt the sandbox and return `true`.
64    /// - If this is called while the sandbox is not running (for example before or after calling a guest function), it will do nothing and return `false`.
65    ///
66    /// # Note
67    /// This function will block for the duration of the time it takes for the vcpu thread to be interrupted.
68    fn kill(&self) -> bool;
69
70    /// Used by a debugger to interrupt the corresponding sandbox from running.
71    ///
72    /// - If this is called while the vcpu is running, then it will interrupt the vcpu and return `true`.
73    /// - If this is called while the vcpu is not running, (for example during a host call), the
74    ///   vcpu will not immediately be interrupted, but will prevent the vcpu from running **the next time**
75    ///   it's scheduled, and returns `false`.
76    ///
77    /// # Note
78    /// This function will block for the duration of the time it takes for the vcpu thread to be interrupted.
79    #[cfg(gdb)]
80    fn kill_from_debugger(&self) -> bool;
81
82    /// Returns true if the corresponding sandbox has been dropped
83    fn dropped(&self) -> bool;
84}
85
86#[cfg(kvm)]
87#[derive(Debug)]
88pub(super) struct LinuxInterruptHandle {
89    /// Atomic value packing vcpu execution state.
90    ///
91    /// Bit layout:
92    /// - Bit 2: DEBUG_INTERRUPT_BIT - set when debugger interrupt is requested
93    /// - Bit 1: RUNNING_BIT - set when vcpu is actively running
94    /// - Bit 0: CANCEL_BIT - set when cancellation has been requested
95    ///
96    /// CANCEL_BIT persists across vcpu exits/re-entries within a single `VirtualCPU::run()` call
97    /// (e.g., during host function calls), but is cleared at the start of each new `VirtualCPU::run()` call.
98    state: AtomicU8,
99
100    /// Thread ID where the vcpu is running.
101    ///
102    /// Note: Multiple VMs may have the same `tid` (same thread runs multiple sandboxes sequentially),
103    /// but at most one VM will have RUNNING_BIT set at any given time.
104    tid: AtomicU64,
105
106    /// Whether the corresponding VM has been dropped.
107    dropped: AtomicBool,
108
109    /// Delay between retry attempts when sending signals to interrupt the vcpu.
110    retry_delay: Duration,
111
112    /// Offset from SIGRTMIN for the signal used to interrupt the vcpu thread.
113    sig_rt_min_offset: u8,
114}
115
116#[cfg(kvm)]
117impl LinuxInterruptHandle {
118    const RUNNING_BIT: u8 = 1 << 1;
119    const CANCEL_BIT: u8 = 1 << 0;
120    #[cfg(gdb)]
121    const DEBUG_INTERRUPT_BIT: u8 = 1 << 2;
122
123    /// Get the running, cancel and debug flags atomically.
124    ///
125    /// # Memory Ordering
126    /// Uses `Acquire` ordering to synchronize with the `Release` in `set_running()` and `kill()`.
127    /// This ensures that when we observe running=true, we also see the correct `tid` value.
128    fn get_running_cancel_debug(&self) -> (bool, bool, bool) {
129        let state = self.state.load(Ordering::Acquire);
130        let running = state & Self::RUNNING_BIT != 0;
131        let cancel = state & Self::CANCEL_BIT != 0;
132        #[cfg(gdb)]
133        let debug = state & Self::DEBUG_INTERRUPT_BIT != 0;
134        #[cfg(not(gdb))]
135        let debug = false;
136        (running, cancel, debug)
137    }
138
139    fn send_signal(&self) -> bool {
140        let signal_number = libc::SIGRTMIN() + self.sig_rt_min_offset as libc::c_int;
141        let mut sent_signal = false;
142
143        loop {
144            let (running, cancel, debug) = self.get_running_cancel_debug();
145
146            // Check if we should continue sending signals
147            // Exit if not running OR if neither cancel nor debug_interrupt is set
148            let should_continue = running && (cancel || debug);
149
150            if !should_continue {
151                break;
152            }
153
154            tracing::info!("Sending signal to kill vcpu thread...");
155            sent_signal = true;
156            // Acquire ordering to synchronize with the Release store in set_tid()
157            // This ensures we see the correct tid value for the currently running vcpu
158            unsafe {
159                libc::pthread_kill(self.tid.load(Ordering::Acquire) as _, signal_number);
160            }
161            std::thread::sleep(self.retry_delay);
162        }
163
164        sent_signal
165    }
166}
167
168#[cfg(kvm)]
169impl InterruptHandleImpl for LinuxInterruptHandle {
170    fn set_tid(&self) {
171        // Release ordering to synchronize with the Acquire load of `running` in send_signal()
172        // This ensures that when send_signal() observes RUNNING_BIT=true (via Acquire),
173        // it also sees the correct tid value stored here
174        self.tid
175            .store(unsafe { libc::pthread_self() as u64 }, Ordering::Release);
176    }
177
178    fn set_running(&self) {
179        // Release ordering to ensure that the tid store (which uses Release)
180        // is visible to any thread that observes running=true via Acquire ordering.
181        // This prevents the interrupt thread from reading a stale tid value.
182        self.state.fetch_or(Self::RUNNING_BIT, Ordering::Release);
183    }
184
185    fn is_cancelled(&self) -> bool {
186        // Acquire ordering to synchronize with the Release in kill()
187        // This ensures we see the cancel flag set by the interrupt thread
188        self.state.load(Ordering::Acquire) & Self::CANCEL_BIT != 0
189    }
190
191    fn clear_cancel(&self) {
192        // Release ordering to ensure that any operations from the previous run()
193        // are visible to other threads. While this is typically called by the vcpu thread
194        // at the start of run(), the VM itself can move between threads across guest calls.
195        self.state.fetch_and(!Self::CANCEL_BIT, Ordering::Release);
196    }
197
198    fn clear_running(&self) {
199        // Release ordering to ensure all vcpu operations are visible before clearing running
200        self.state.fetch_and(!Self::RUNNING_BIT, Ordering::Release);
201    }
202
203    fn is_debug_interrupted(&self) -> bool {
204        #[cfg(gdb)]
205        {
206            self.state.load(Ordering::Acquire) & Self::DEBUG_INTERRUPT_BIT != 0
207        }
208        #[cfg(not(gdb))]
209        {
210            false
211        }
212    }
213
214    #[cfg(gdb)]
215    fn clear_debug_interrupt(&self) {
216        self.state
217            .fetch_and(!Self::DEBUG_INTERRUPT_BIT, Ordering::Release);
218    }
219
220    fn set_dropped(&self) {
221        // Release ordering to ensure all VM cleanup operations are visible
222        // to any thread that checks dropped() via Acquire
223        self.dropped.store(true, Ordering::Release);
224    }
225}
226
227#[cfg(kvm)]
228impl InterruptHandle for LinuxInterruptHandle {
229    fn kill(&self) -> bool {
230        // Release ordering ensures that any writes before kill() are visible to the vcpu thread
231        // when it checks is_cancelled() with Acquire ordering
232        self.state.fetch_or(Self::CANCEL_BIT, Ordering::Release);
233
234        // Send signals to interrupt the vcpu if it's currently running
235        self.send_signal()
236    }
237
238    #[cfg(gdb)]
239    fn kill_from_debugger(&self) -> bool {
240        self.state
241            .fetch_or(Self::DEBUG_INTERRUPT_BIT, Ordering::Release);
242        self.send_signal()
243    }
244    fn dropped(&self) -> bool {
245        // Acquire ordering to synchronize with the Release in set_dropped()
246        // This ensures we see all VM cleanup operations that happened before drop
247        self.dropped.load(Ordering::Acquire)
248    }
249}