Skip to content

Commit ab04a1e

Browse files
hoshinolinaslp
authored andcommitted
mount: Add support for FEX merged rootfs mode
In this mode, the FEX rootfs passed to FEX is already overlaid on top of the real root filesystem, so FEX directs all guest accesses to it instead of doing its own overlay logic. This, together with a bunch of fixes on the FEX side, fixes Wine. Opt-in for now, since this actively *breaks* things with the current FEX. May become the default in the future once all that is sorted out. Signed-off-by: Asahi Lina <[email protected]>
1 parent 729f613 commit ab04a1e

3 files changed

Lines changed: 204 additions & 40 deletions

File tree

crates/muvm/src/bin/muvm.rs

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -222,6 +222,15 @@ fn main() -> Result<ExitCode> {
222222
.collect()
223223
};
224224

225+
if options.merged_rootfs {
226+
if disks.is_empty() {
227+
return Err(anyhow!(
228+
"Merged RootFS mode requires one or more RootFS images"
229+
));
230+
}
231+
env.insert("FEX_MERGEDROOTFS".to_owned(), "1".to_owned());
232+
}
233+
225234
for path in disks {
226235
add_ro_disk(ctx_id, &path, &path).context("Failed to configure disk")?;
227236
}

crates/muvm/src/cli_options.rs

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@ pub struct Options {
1616
pub root_server_port: u32,
1717
pub server_port: u32,
1818
pub fex_images: Vec<String>,
19+
pub merged_rootfs: bool,
1920
pub sommelier: bool,
2021
pub interactive: bool,
2122
pub tty: bool,
@@ -90,6 +91,10 @@ pub fn options() -> OptionParser<Options> {
9091
)
9192
.argument::<String>("FEX_IMAGE")
9293
.many();
94+
let merged_rootfs = long("merged-rootfs")
95+
.short('m')
96+
.help("Use merged rootfs for FEX (experimental)")
97+
.switch();
9398
let passt_socket = long("passt-socket")
9499
.help("Instead of starting passt, connect to passt socket at PATH")
95100
.argument("PATH")
@@ -133,6 +138,7 @@ pub fn options() -> OptionParser<Options> {
133138
root_server_port,
134139
server_port,
135140
fex_images,
141+
merged_rootfs,
136142
sommelier,
137143
interactive,
138144
tty,

crates/muvm/src/guest/mount.rs

Lines changed: 189 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,19 @@
1+
use std::collections::HashSet;
2+
use std::env;
13
use std::ffi::CString;
2-
use std::fs::{read_dir, File};
4+
use std::fs::{read_dir, read_link, File};
35
use std::io::Write;
46
use std::os::fd::AsFd;
5-
use std::path::Path;
7+
use std::path::{Path, PathBuf};
68

79
use anyhow::{Context, Result};
810
use rustix::fs::{mkdir, symlink, Mode, CWD};
911
use rustix::mount::{
10-
mount2, mount_bind, move_mount, open_tree, unmount, MountFlags, MoveMountFlags, OpenTreeFlags,
11-
UnmountFlags,
12+
mount2, mount_bind, mount_recursive_bind, move_mount, open_tree, unmount, MountFlags,
13+
MoveMountFlags, OpenTreeFlags, UnmountFlags,
1214
};
15+
use rustix::path::Arg;
16+
use serde_json::json;
1317

1418
fn make_tmpfs(dir: &str) -> Result<()> {
1519
mount2(
@@ -31,6 +35,21 @@ fn mkdir_fex(dir: &str) {
3135
.unwrap();
3236
}
3337

38+
fn do_mount_recursive_bind(source: &str, target: PathBuf) -> Result<()> {
39+
// Special case, do not recursively mount the FEX stuff itself, but do
40+
// the /run/muvm-host thing.
41+
if source == "/run" {
42+
mount_bind(source, &target)
43+
.context(format!("Failed to mount {:?} on {:?}", &source, &target))?;
44+
let host = target.join("muvm-host");
45+
mount_bind("/", &host).context(format!("Failed to mount / on {:?}", &host))?;
46+
} else {
47+
mount_recursive_bind(source, &target)
48+
.context(format!("Failed to mount {:?} on {:?}", &source, &target))?;
49+
}
50+
Ok(())
51+
}
52+
3453
fn mount_fex_rootfs() -> Result<()> {
3554
let dir = "/run/fex-emu/";
3655
let dir_rootfs = dir.to_string() + "rootfs";
@@ -41,6 +60,19 @@ fn mount_fex_rootfs() -> Result<()> {
4160
let flags = MountFlags::RDONLY;
4261
let mut images = Vec::new();
4362

63+
let merged_rootfs = env::var("FEX_MERGEDROOTFS")
64+
.map(|a| a != "0")
65+
.unwrap_or(false);
66+
67+
// In merged RootFS mode, make /run/fex-emu a tmpfs.
68+
// This ensures that once /run is bind-mounted into the
69+
// rootfs, /run/fex-emu/* isn't itself visible within the
70+
// rootfs, so recursive RootFS lookups don't succeed and
71+
// break things.
72+
if merged_rootfs {
73+
make_tmpfs(dir)?;
74+
}
75+
4476
// Find /dev/vd*
4577
for x in read_dir("/dev").unwrap() {
4678
let file = x.unwrap();
@@ -60,33 +92,142 @@ fn mount_fex_rootfs() -> Result<()> {
6092
images.push(dir);
6193
}
6294

63-
if images.len() >= 2 {
64-
// Overlay the mounts together.
65-
let opts = format!(
66-
"lowerdir={}",
67-
images.into_iter().rev().collect::<Vec<String>>().join(":")
68-
);
69-
let opts = CString::new(opts).unwrap();
70-
let overlay = "overlay".to_string();
71-
let overlay_ = Some(&overlay);
72-
73-
mkdir_fex(&dir_rootfs);
74-
mount2(overlay_, &dir_rootfs, overlay_, flags, Some(&opts)).context("Failed to overlay")?;
75-
} else if images.len() == 1 {
76-
// Just expose the one mount
77-
symlink(&images[0], &dir_rootfs)?;
78-
} else if images.is_empty() {
95+
if images.is_empty() {
7996
// If no images were passed, FEX is either managed by the host os
8097
// or is not installed at all. Avoid clobbering the config in that case.
98+
// merged_rootfs is ignored in this case, and we unset the env var so
99+
// the state of MergedRootFS is strictly managed by the host config.
100+
// TODO: Remove once #134 is merged, move merged_rootfs to config.
101+
// SAFETY: muvm-guest is single-threaded.
102+
unsafe { env::remove_var("FEX_MERGEDROOTFS") };
81103
return Ok(());
82104
}
83105

106+
#[allow(clippy::collapsible_else_if)]
107+
if merged_rootfs {
108+
// For merged rootfs mode, we need to overlay subtrees separately
109+
// onto the real rootfs. First, insert the real rootfs as the
110+
// bottom-most "image".
111+
images.insert(0, "/".to_owned());
112+
113+
let mut merge_dirs = HashSet::new();
114+
let mut non_dirs = HashSet::new();
115+
116+
mkdir_fex(&dir_rootfs);
117+
118+
// List all the merged root entries in each layer
119+
// Go backwards, since the file type of the topmost layer "wins"
120+
for image in images.iter().rev() {
121+
for entry in read_dir(image).unwrap() {
122+
let Ok(entry) = entry else { continue };
123+
let Ok(file_type) = entry.file_type() else {
124+
continue;
125+
};
126+
let source = entry.path();
127+
let file_name = entry.file_name().to_str().unwrap().to_owned();
128+
let target = Path::new(&dir_rootfs).join(&file_name);
129+
130+
if file_type.is_file() {
131+
// File in the root fs, bind mount it from the uppermost layer
132+
if non_dirs.insert(file_name) {
133+
File::create(&target)?;
134+
mount_bind(&source, &target)?;
135+
}
136+
} else if file_type.is_symlink() {
137+
// Symlink in the root fs, create it from the uppermost layer
138+
if non_dirs.insert(file_name) {
139+
let symlink_target = read_link(source)?;
140+
symlink(&symlink_target, &target)?;
141+
}
142+
} else {
143+
// Directory, so we potentially have to overlayfs it
144+
if merge_dirs.insert(file_name) {
145+
mkdir_fex(target.as_str()?);
146+
}
147+
}
148+
}
149+
}
150+
151+
// Now, go through each potential merged dir and figure out which
152+
// layers have it, then mount an overlayfs (or bind if one layer).
153+
for dir in merge_dirs {
154+
let target = Path::new(&dir_rootfs).join(&dir);
155+
let mut layers = Vec::new();
156+
157+
for image in images.iter() {
158+
let source = Path::new(image).join(&dir);
159+
if source.is_dir() {
160+
layers.push(source.as_str().unwrap().to_owned());
161+
}
162+
}
163+
assert!(!layers.is_empty());
164+
if layers.len() == 1 {
165+
do_mount_recursive_bind(&layers[0], target)?;
166+
} else {
167+
if layers[0] == "/etc" {
168+
// Special case: /etc has an overlaid mount for /etc/resolv.conf,
169+
// which will confuse overlayfs. So grab the raw mount.
170+
layers[0] = "/run/muvm-host/etc".to_owned();
171+
}
172+
let opts = format!(
173+
"lowerdir={},metacopy=off,redirect_dir=nofollow,userxattr",
174+
layers.into_iter().rev().collect::<Vec<String>>().join(":")
175+
);
176+
let opts = CString::new(opts).unwrap();
177+
let overlay = "overlay".to_string();
178+
let overlay_ = Some(&overlay);
179+
180+
mount2(overlay_, &target, overlay_, flags, Some(&opts))
181+
.context("Failed to overlay")?;
182+
}
183+
}
184+
185+
// Special case: Put back the /etc/resolv.conf overlay on top
186+
overlay_file(
187+
"/etc/resolv.conf",
188+
&(dir_rootfs.clone() + "/etc/resolv.conf"),
189+
)?;
190+
} else {
191+
if images.len() >= 2 {
192+
// Overlay the mounts together.
193+
let opts = format!(
194+
"lowerdir={}",
195+
images.into_iter().rev().collect::<Vec<String>>().join(":")
196+
);
197+
let opts = CString::new(opts).unwrap();
198+
let overlay = "overlay".to_string();
199+
let overlay_ = Some(&overlay);
200+
201+
mkdir_fex(&dir_rootfs);
202+
mount2(overlay_, &dir_rootfs, overlay_, flags, Some(&opts))
203+
.context("Failed to overlay")?;
204+
} else {
205+
assert!(images.len() == 1);
206+
// Just expose the one mount
207+
symlink(&images[0], &dir_rootfs)?;
208+
}
209+
}
210+
84211
// Now we need to tell FEX about this. One of the FEX share directories has an unmounted rootfs
85212
// and a Config.json telling FEX to use FUSE. Neither should be visible to the guest. Instead,
86213
// we want to replace the folders and tell FEX to use our mounted rootfs
87214
for base in ["/usr/share/fex-emu", "/usr/local/share/fex-emu"] {
88215
if Path::new(base).exists() {
89-
let json = format!("{{\"Config\":{{\"RootFS\":\"{dir_rootfs}\"}}}}\n");
216+
let json = if merged_rootfs {
217+
json!({
218+
"Config": {
219+
"RootFS": dir_rootfs,
220+
"MergedRootFS": "1",
221+
},
222+
})
223+
} else {
224+
json!({
225+
"Config": {
226+
"RootFS": dir_rootfs,
227+
},
228+
})
229+
}
230+
.to_string();
90231
let path = base.to_string() + "/Config.json";
91232
let host_dir = "/run/muvm-host".to_string() + base;
92233

@@ -108,6 +249,24 @@ fn mount_fex_rootfs() -> Result<()> {
108249
Ok(())
109250
}
110251

252+
pub fn overlay_file(src: &str, dest: &str) -> Result<()> {
253+
let fd = open_tree(
254+
CWD,
255+
src,
256+
OpenTreeFlags::OPEN_TREE_CLONE | OpenTreeFlags::OPEN_TREE_CLOEXEC,
257+
)
258+
.with_context(|| format!("Failed to open_tree {src:?}"))?;
259+
260+
move_mount(
261+
fd.as_fd(),
262+
"",
263+
CWD,
264+
dest,
265+
MoveMountFlags::MOVE_MOUNT_F_EMPTY_PATH,
266+
)
267+
.with_context(|| format!("Failed to move_mount {src:?} to {dest:?}"))
268+
}
269+
111270
pub fn place_etc(file: &str, contents: Option<&str>) -> Result<()> {
112271
let tmp = "/tmp/".to_string() + file;
113272
let etc = "/etc/".to_string() + file;
@@ -126,30 +285,12 @@ pub fn place_etc(file: &str, contents: Option<&str>) -> Result<()> {
126285
}
127286
}
128287

129-
let fd = open_tree(
130-
CWD,
131-
&tmp,
132-
OpenTreeFlags::OPEN_TREE_CLONE | OpenTreeFlags::OPEN_TREE_CLOEXEC,
133-
)
134-
.context("Failed to open_tree tmp")?;
135-
136-
move_mount(
137-
fd.as_fd(),
138-
"",
139-
CWD,
140-
etc,
141-
MoveMountFlags::MOVE_MOUNT_F_EMPTY_PATH,
142-
)
143-
.context("Failed to move_mount tmp to etc")
288+
overlay_file(&tmp, &etc)
144289
}
145290

146291
pub fn mount_filesystems() -> Result<()> {
147292
make_tmpfs("/var/run")?;
148293

149-
if mount_fex_rootfs().is_err() {
150-
println!("Failed to mount FEX rootfs, carrying on without.")
151-
}
152-
153294
place_etc("resolv.conf", None)?;
154295

155296
mount2(
@@ -184,5 +325,13 @@ pub fn mount_filesystems() -> Result<()> {
184325
)
185326
.context("Failed to mount `/dev/shm`")?;
186327

328+
// Do this last so it can pick up all the submounts made above.
329+
if let Err(e) = mount_fex_rootfs() {
330+
println!(
331+
"Failed to mount FEX rootfs, carrying on without. Error: {}",
332+
e
333+
);
334+
}
335+
187336
Ok(())
188337
}

0 commit comments

Comments
 (0)