Skip to content

Commit b706c35

Browse files
committed
Implement a host memory monitor
This monitor checks the ratio of memory available on the host and, if needed, contacts the VM (through muvm-server) to adjust /proc/sys/vm/watermark_scale_factor and, if the situation is critical, to request it to drop its caches. Signed-off-by: Sergio Lopez <[email protected]>
1 parent 720da8f commit b706c35

7 files changed

Lines changed: 162 additions & 7 deletions

File tree

Cargo.lock

Lines changed: 29 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

crates/muvm/Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@ env_logger = { version = "0.11.3", default-features = false, features = ["auto-c
1616
krun-sys = { path = "../krun-sys", version = "1.9.1", default-features = false, features = [] }
1717
log = { version = "0.4.21", default-features = false, features = ["kv"] }
1818
nix = { version = "0.28.0", default-features = false, features = ["user"] }
19+
procfs = { version = "0.17.0", default-features = false, features = [] }
1920
rustix = { version = "0.38.34", default-features = false, features = ["fs", "mount", "process", "std", "stdio", "system", "use-libc-auxv"] }
2021
serde = { version = "1.0.203", default-features = false, features = ["derive"] }
2122
serde_json = { version = "1.0.117", default-features = false, features = ["std"] }

crates/muvm/src/bin/muvm.rs

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@ use muvm::cli_options::options;
1616
use muvm::cpu::{get_fallback_cores, get_performance_cores};
1717
use muvm::env::{find_muvm_exec, prepare_env_vars};
1818
use muvm::launch::{launch_or_lock, LaunchResult};
19+
use muvm::monitor::spawn_monitor;
1920
use muvm::net::{connect_to_passt, start_passt};
2021
use muvm::types::MiB;
2122
use nix::sys::sysinfo::sysinfo;
@@ -435,6 +436,8 @@ fn main() -> Result<()> {
435436
}
436437
}
437438

439+
spawn_monitor(options.root_server_port, cookie);
440+
438441
{
439442
// Start and enter the microVM. Unless there is some error while creating the
440443
// microVM this function never returns.

crates/muvm/src/launch.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -148,7 +148,7 @@ fn lock_file() -> Result<(Option<File>, Uuid)> {
148148
Ok((Some(lock_file), cookie))
149149
}
150150

151-
fn request_launch(
151+
pub fn request_launch(
152152
server_port: u32,
153153
cookie: Uuid,
154154
command: PathBuf,

crates/muvm/src/lib.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@ pub mod cli_options;
22
pub mod cpu;
33
pub mod env;
44
pub mod launch;
5+
pub mod monitor;
56
pub mod net;
67
pub mod types;
78

crates/muvm/src/monitor.rs

Lines changed: 96 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,96 @@
1+
use std::collections::HashMap;
2+
use std::path::PathBuf;
3+
use std::thread;
4+
use std::time;
5+
6+
use anyhow::Result;
7+
use log::debug;
8+
use procfs::{Current, Meminfo};
9+
use uuid::Uuid;
10+
11+
use crate::launch::request_launch;
12+
13+
#[derive(Clone, Debug, PartialEq)]
14+
pub enum GuestPressure {
15+
None,
16+
Low,
17+
Medium,
18+
High,
19+
Critical,
20+
}
21+
22+
impl From<GuestPressure> for u32 {
23+
fn from(pressure: GuestPressure) -> u32 {
24+
match pressure {
25+
GuestPressure::None => 10,
26+
GuestPressure::Low => 1000,
27+
GuestPressure::Medium => 2000,
28+
GuestPressure::High => 3000,
29+
// Same waterlevel as High, but also explicitly requesting
30+
// the guest to drop its page cache.
31+
GuestPressure::Critical => 3000,
32+
}
33+
}
34+
}
35+
36+
pub fn spawn_monitor(server_port: u32, cookie: Uuid) {
37+
thread::spawn(move || run(server_port, cookie));
38+
}
39+
40+
fn set_guest_pressure(server_port: u32, cookie: Uuid, pressure: GuestPressure) -> Result<()> {
41+
if pressure == GuestPressure::Critical {
42+
debug!("requesting the guest to drop its caches");
43+
// This is a fake command that tells muvm-server to write to "/proc/sys/vm/drop_caches"
44+
let command = PathBuf::from("/muvmdropcaches");
45+
let command_args = vec![];
46+
let env = HashMap::new();
47+
request_launch(server_port, cookie, command, command_args, env)?;
48+
}
49+
50+
let wsf: u32 = pressure.into();
51+
debug!("setting watermark_scale_factor to {wsf}");
52+
53+
let command = PathBuf::from("/sbin/sysctl");
54+
let command_args = vec![format!("vm.watermark_scale_factor={}", wsf)];
55+
let env = HashMap::new();
56+
request_launch(server_port, cookie, command, command_args, env)
57+
}
58+
59+
fn run(server_port: u32, cookie: Uuid) {
60+
let mut guest_pressure = GuestPressure::None;
61+
loop {
62+
let meminfo = Meminfo::current().ok();
63+
if let Some(meminfo) = meminfo {
64+
if let Some(available) = meminfo.mem_available {
65+
let avail_ratio = (available * 100) / meminfo.mem_total;
66+
debug!(
67+
"avail_ratio={avail_ratio}, avail={available}, total={}",
68+
meminfo.mem_total
69+
);
70+
let new_pressure = if avail_ratio <= 10 {
71+
GuestPressure::Critical
72+
} else if avail_ratio <= 15 {
73+
GuestPressure::High
74+
} else if avail_ratio <= 20 {
75+
GuestPressure::Medium
76+
} else if avail_ratio <= 25 {
77+
GuestPressure::Low
78+
} else {
79+
GuestPressure::None
80+
};
81+
82+
debug!("Pressure at {:?}", new_pressure);
83+
84+
if new_pressure != guest_pressure {
85+
if let Err(err) = set_guest_pressure(server_port, cookie, new_pressure.clone())
86+
{
87+
println!("Failed to set the new pressure in the guest: {err}");
88+
} else {
89+
guest_pressure = new_pressure;
90+
}
91+
}
92+
}
93+
}
94+
thread::sleep(time::Duration::from_millis(500));
95+
}
96+
}

crates/muvm/src/server/worker.rs

Lines changed: 31 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
11
use std::collections::HashMap;
2+
use std::fs::File;
3+
use std::io::Write;
24
use std::os::unix::process::ExitStatusExt as _;
3-
use std::path::PathBuf;
5+
use std::path::{Path, PathBuf};
46
use std::process::{ExitStatus, Stdio};
57
use std::{env, io};
68

@@ -18,6 +20,11 @@ use uuid::Uuid;
1820
use crate::utils::launch::Launch;
1921
use crate::utils::stdio::make_stdout_stderr;
2022

23+
pub enum ConnRequest {
24+
DropCaches,
25+
ExecuteCommand { command: PathBuf, child: Child },
26+
}
27+
2128
#[derive(Debug)]
2229
pub struct Worker {
2330
cookie: Uuid,
@@ -60,9 +67,12 @@ impl Worker {
6067
let stream = BufStream::new(stream);
6168

6269
match handle_connection(self.cookie, stream).await {
63-
Ok((command, mut child)) => {
64-
self.child_set.spawn(async move { (command, child.wait().await) });
65-
self.set_child_processes(self.child_set.len());
70+
Ok(request) => match request {
71+
ConnRequest::DropCaches => {},
72+
ConnRequest::ExecuteCommand {command, mut child } => {
73+
self.child_set.spawn(async move { (command, child.wait().await) });
74+
self.set_child_processes(self.child_set.len());
75+
}
6676
},
6777
Err(err) => {
6878
eprintln!("Failed to process client request: {err:?}");
@@ -164,7 +174,7 @@ async fn read_request(stream: &mut BufStream<TcpStream>) -> Result<Launch> {
164174
async fn handle_connection(
165175
server_cookie: Uuid,
166176
mut stream: BufStream<TcpStream>,
167-
) -> Result<(PathBuf, Child)> {
177+
) -> Result<ConnRequest> {
168178
let mut envs: HashMap<String, String> = env::vars().collect();
169179

170180
let Launch {
@@ -182,6 +192,21 @@ async fn handle_connection(
182192
return Err(anyhow!(msg));
183193
}
184194

195+
if command == Path::new("/muvmdropcaches") {
196+
let mut file = File::options()
197+
.write(true)
198+
.open("/proc/sys/vm/drop_caches")
199+
.context("Failed to open /proc/sys/vm/drop_caches for writing")?;
200+
201+
{
202+
file.write_all(b"1")
203+
.context("Failed to write to /proc/sys/vm/drop_caches")?;
204+
}
205+
stream.write_all(b"OK").await.ok();
206+
stream.flush().await.ok();
207+
return Ok(ConnRequest::DropCaches);
208+
}
209+
185210
envs.extend(env);
186211

187212
let (stdout, stderr) = make_stdout_stderr(&command, &envs)?;
@@ -202,5 +227,5 @@ async fn handle_connection(
202227
}
203228
stream.flush().await.ok();
204229

205-
res.map(|child| (command, child))
230+
res.map(|child| ConnRequest::ExecuteCommand { command, child })
206231
}

0 commit comments

Comments
 (0)