1+ use std:: collections:: HashSet ;
2+ use std:: env;
13use std:: ffi:: CString ;
2- use std:: fs:: { read_dir, File } ;
4+ use std:: fs:: { read_dir, read_link , File } ;
35use std:: io:: Write ;
46use std:: os:: fd:: AsFd ;
5- use std:: path:: Path ;
7+ use std:: path:: { Path , PathBuf } ;
68
79use anyhow:: { Context , Result } ;
810use rustix:: fs:: { mkdir, symlink, Mode , CWD } ;
911use rustix:: mount:: {
10- mount2, mount_bind, move_mount, open_tree, unmount, MountFlags , MoveMountFlags , OpenTreeFlags ,
11- UnmountFlags ,
12+ mount2, mount_bind, mount_recursive_bind , move_mount, open_tree, unmount, MountFlags ,
13+ MoveMountFlags , OpenTreeFlags , UnmountFlags ,
1214} ;
15+ use rustix:: path:: Arg ;
16+ use serde_json:: json;
1317
1418fn make_tmpfs ( dir : & str ) -> Result < ( ) > {
1519 mount2 (
@@ -31,6 +35,21 @@ fn mkdir_fex(dir: &str) {
3135 . unwrap ( ) ;
3236}
3337
38+ fn do_mount_recursive_bind ( source : & str , target : PathBuf ) -> Result < ( ) > {
39+ // Special case, do not recursively mount the FEX stuff itself, but do
40+ // the /run/muvm-host thing.
41+ if source == "/run" {
42+ mount_bind ( source, & target)
43+ . context ( format ! ( "Failed to mount {:?} on {:?}" , & source, & target) ) ?;
44+ let host = target. join ( "muvm-host" ) ;
45+ mount_bind ( "/" , & host) . context ( format ! ( "Failed to mount / on {:?}" , & host) ) ?;
46+ } else {
47+ mount_recursive_bind ( source, & target)
48+ . context ( format ! ( "Failed to mount {:?} on {:?}" , & source, & target) ) ?;
49+ }
50+ Ok ( ( ) )
51+ }
52+
3453fn mount_fex_rootfs ( ) -> Result < ( ) > {
3554 let dir = "/run/fex-emu/" ;
3655 let dir_rootfs = dir. to_string ( ) + "rootfs" ;
@@ -41,6 +60,19 @@ fn mount_fex_rootfs() -> Result<()> {
4160 let flags = MountFlags :: RDONLY ;
4261 let mut images = Vec :: new ( ) ;
4362
63+ let merged_rootfs = env:: var ( "FEX_MERGEDROOTFS" )
64+ . map ( |a| a != "0" )
65+ . unwrap_or ( false ) ;
66+
67+ // In merged RootFS mode, make /run/fex-emu a tmpfs.
68+ // This ensures that once /run is bind-mounted into the
69+ // rootfs, /run/fex-emu/* isn't itself visible within the
70+ // rootfs, so recursive RootFS lookups don't succeed and
71+ // break things.
72+ if merged_rootfs {
73+ make_tmpfs ( dir) ?;
74+ }
75+
4476 // Find /dev/vd*
4577 for x in read_dir ( "/dev" ) . unwrap ( ) {
4678 let file = x. unwrap ( ) ;
@@ -60,33 +92,142 @@ fn mount_fex_rootfs() -> Result<()> {
6092 images. push ( dir) ;
6193 }
6294
63- if images. len ( ) >= 2 {
64- // Overlay the mounts together.
65- let opts = format ! (
66- "lowerdir={}" ,
67- images. into_iter( ) . rev( ) . collect:: <Vec <String >>( ) . join( ":" )
68- ) ;
69- let opts = CString :: new ( opts) . unwrap ( ) ;
70- let overlay = "overlay" . to_string ( ) ;
71- let overlay_ = Some ( & overlay) ;
72-
73- mkdir_fex ( & dir_rootfs) ;
74- mount2 ( overlay_, & dir_rootfs, overlay_, flags, Some ( & opts) ) . context ( "Failed to overlay" ) ?;
75- } else if images. len ( ) == 1 {
76- // Just expose the one mount
77- symlink ( & images[ 0 ] , & dir_rootfs) ?;
78- } else if images. is_empty ( ) {
95+ if images. is_empty ( ) {
7996 // If no images were passed, FEX is either managed by the host os
8097 // or is not installed at all. Avoid clobbering the config in that case.
98+ // merged_rootfs is ignored in this case, and we unset the env var so
99+ // the state of MergedRootFS is strictly managed by the host config.
100+ // TODO: Remove once #134 is merged, move merged_rootfs to config.
101+ // SAFETY: muvm-guest is single-threaded.
102+ unsafe { env:: remove_var ( "FEX_MERGEDROOTFS" ) } ;
81103 return Ok ( ( ) ) ;
82104 }
83105
106+ #[ allow( clippy:: collapsible_else_if) ]
107+ if merged_rootfs {
108+ // For merged rootfs mode, we need to overlay subtrees separately
109+ // onto the real rootfs. First, insert the real rootfs as the
110+ // bottom-most "image".
111+ images. insert ( 0 , "/" . to_owned ( ) ) ;
112+
113+ let mut merge_dirs = HashSet :: new ( ) ;
114+ let mut non_dirs = HashSet :: new ( ) ;
115+
116+ mkdir_fex ( & dir_rootfs) ;
117+
118+ // List all the merged root entries in each layer
119+ // Go backwards, since the file type of the topmost layer "wins"
120+ for image in images. iter ( ) . rev ( ) {
121+ for entry in read_dir ( image) . unwrap ( ) {
122+ let Ok ( entry) = entry else { continue } ;
123+ let Ok ( file_type) = entry. file_type ( ) else {
124+ continue ;
125+ } ;
126+ let source = entry. path ( ) ;
127+ let file_name = entry. file_name ( ) . to_str ( ) . unwrap ( ) . to_owned ( ) ;
128+ let target = Path :: new ( & dir_rootfs) . join ( & file_name) ;
129+
130+ if file_type. is_file ( ) {
131+ // File in the root fs, bind mount it from the uppermost layer
132+ if non_dirs. insert ( file_name) {
133+ File :: create ( & target) ?;
134+ mount_bind ( & source, & target) ?;
135+ }
136+ } else if file_type. is_symlink ( ) {
137+ // Symlink in the root fs, create it from the uppermost layer
138+ if non_dirs. insert ( file_name) {
139+ let symlink_target = read_link ( source) ?;
140+ symlink ( & symlink_target, & target) ?;
141+ }
142+ } else {
143+ // Directory, so we potentially have to overlayfs it
144+ if merge_dirs. insert ( file_name) {
145+ mkdir_fex ( target. as_str ( ) ?) ;
146+ }
147+ }
148+ }
149+ }
150+
151+ // Now, go through each potential merged dir and figure out which
152+ // layers have it, then mount an overlayfs (or bind if one layer).
153+ for dir in merge_dirs {
154+ let target = Path :: new ( & dir_rootfs) . join ( & dir) ;
155+ let mut layers = Vec :: new ( ) ;
156+
157+ for image in images. iter ( ) {
158+ let source = Path :: new ( image) . join ( & dir) ;
159+ if source. is_dir ( ) {
160+ layers. push ( source. as_str ( ) . unwrap ( ) . to_owned ( ) ) ;
161+ }
162+ }
163+ assert ! ( !layers. is_empty( ) ) ;
164+ if layers. len ( ) == 1 {
165+ do_mount_recursive_bind ( & layers[ 0 ] , target) ?;
166+ } else {
167+ if layers[ 0 ] == "/etc" {
168+ // Special case: /etc has an overlaid mount for /etc/resolv.conf,
169+ // which will confuse overlayfs. So grab the raw mount.
170+ layers[ 0 ] = "/run/muvm-host/etc" . to_owned ( ) ;
171+ }
172+ let opts = format ! (
173+ "lowerdir={},metacopy=off,redirect_dir=nofollow,userxattr" ,
174+ layers. into_iter( ) . rev( ) . collect:: <Vec <String >>( ) . join( ":" )
175+ ) ;
176+ let opts = CString :: new ( opts) . unwrap ( ) ;
177+ let overlay = "overlay" . to_string ( ) ;
178+ let overlay_ = Some ( & overlay) ;
179+
180+ mount2 ( overlay_, & target, overlay_, flags, Some ( & opts) )
181+ . context ( "Failed to overlay" ) ?;
182+ }
183+ }
184+
185+ // Special case: Put back the /etc/resolv.conf overlay on top
186+ overlay_file (
187+ "/etc/resolv.conf" ,
188+ & ( dir_rootfs. clone ( ) + "/etc/resolv.conf" ) ,
189+ ) ?;
190+ } else {
191+ if images. len ( ) >= 2 {
192+ // Overlay the mounts together.
193+ let opts = format ! (
194+ "lowerdir={}" ,
195+ images. into_iter( ) . rev( ) . collect:: <Vec <String >>( ) . join( ":" )
196+ ) ;
197+ let opts = CString :: new ( opts) . unwrap ( ) ;
198+ let overlay = "overlay" . to_string ( ) ;
199+ let overlay_ = Some ( & overlay) ;
200+
201+ mkdir_fex ( & dir_rootfs) ;
202+ mount2 ( overlay_, & dir_rootfs, overlay_, flags, Some ( & opts) )
203+ . context ( "Failed to overlay" ) ?;
204+ } else {
205+ assert ! ( images. len( ) == 1 ) ;
206+ // Just expose the one mount
207+ symlink ( & images[ 0 ] , & dir_rootfs) ?;
208+ }
209+ }
210+
84211 // Now we need to tell FEX about this. One of the FEX share directories has an unmounted rootfs
85212 // and a Config.json telling FEX to use FUSE. Neither should be visible to the guest. Instead,
86213 // we want to replace the folders and tell FEX to use our mounted rootfs
87214 for base in [ "/usr/share/fex-emu" , "/usr/local/share/fex-emu" ] {
88215 if Path :: new ( base) . exists ( ) {
89- let json = format ! ( "{{\" Config\" :{{\" RootFS\" :\" {dir_rootfs}\" }}}}\n " ) ;
216+ let json = if merged_rootfs {
217+ json ! ( {
218+ "Config" : {
219+ "RootFS" : dir_rootfs,
220+ "MergedRootFS" : "1" ,
221+ } ,
222+ } )
223+ } else {
224+ json ! ( {
225+ "Config" : {
226+ "RootFS" : dir_rootfs,
227+ } ,
228+ } )
229+ }
230+ . to_string ( ) ;
90231 let path = base. to_string ( ) + "/Config.json" ;
91232 let host_dir = "/run/muvm-host" . to_string ( ) + base;
92233
@@ -108,6 +249,24 @@ fn mount_fex_rootfs() -> Result<()> {
108249 Ok ( ( ) )
109250}
110251
252+ pub fn overlay_file ( src : & str , dest : & str ) -> Result < ( ) > {
253+ let fd = open_tree (
254+ CWD ,
255+ src,
256+ OpenTreeFlags :: OPEN_TREE_CLONE | OpenTreeFlags :: OPEN_TREE_CLOEXEC ,
257+ )
258+ . with_context ( || format ! ( "Failed to open_tree {src:?}" ) ) ?;
259+
260+ move_mount (
261+ fd. as_fd ( ) ,
262+ "" ,
263+ CWD ,
264+ dest,
265+ MoveMountFlags :: MOVE_MOUNT_F_EMPTY_PATH ,
266+ )
267+ . with_context ( || format ! ( "Failed to move_mount {src:?} to {dest:?}" ) )
268+ }
269+
111270pub fn place_etc ( file : & str , contents : Option < & str > ) -> Result < ( ) > {
112271 let tmp = "/tmp/" . to_string ( ) + file;
113272 let etc = "/etc/" . to_string ( ) + file;
@@ -126,30 +285,12 @@ pub fn place_etc(file: &str, contents: Option<&str>) -> Result<()> {
126285 }
127286 }
128287
129- let fd = open_tree (
130- CWD ,
131- & tmp,
132- OpenTreeFlags :: OPEN_TREE_CLONE | OpenTreeFlags :: OPEN_TREE_CLOEXEC ,
133- )
134- . context ( "Failed to open_tree tmp" ) ?;
135-
136- move_mount (
137- fd. as_fd ( ) ,
138- "" ,
139- CWD ,
140- etc,
141- MoveMountFlags :: MOVE_MOUNT_F_EMPTY_PATH ,
142- )
143- . context ( "Failed to move_mount tmp to etc" )
288+ overlay_file ( & tmp, & etc)
144289}
145290
146291pub fn mount_filesystems ( ) -> Result < ( ) > {
147292 make_tmpfs ( "/var/run" ) ?;
148293
149- if mount_fex_rootfs ( ) . is_err ( ) {
150- println ! ( "Failed to mount FEX rootfs, carrying on without." )
151- }
152-
153294 place_etc ( "resolv.conf" , None ) ?;
154295
155296 mount2 (
@@ -184,5 +325,13 @@ pub fn mount_filesystems() -> Result<()> {
184325 )
185326 . context ( "Failed to mount `/dev/shm`" ) ?;
186327
328+ // Do this last so it can pick up all the submounts made above.
329+ if let Err ( e) = mount_fex_rootfs ( ) {
330+ println ! (
331+ "Failed to mount FEX rootfs, carrying on without. Error: {}" ,
332+ e
333+ ) ;
334+ }
335+
187336 Ok ( ( ) )
188337}
0 commit comments