99"""
1010Multiprocessing helpers for EdenFS CLI.
1111
12- Re-derives native library directories from sys.path (which still contains
13- Buck2 link-tree paths even after the bootstrapper cleans LD_LIBRARY_PATH)
14- and sets them as LD_LIBRARY_PATH / DYLD_LIBRARY_PATH in os.environ so that
15- spawned child processes can find native .so/.dylib files like folly.iobuf.
12+ Fixes two problems that prevent multiprocessing 'spawn' children from
13+ working correctly in standalone PAR builds on macOS/Windows:
14+
15+ 1. __main__ re-import: multiprocessing.spawn re-imports the __main__
16+ module in the child process. In a PAR, this triggers the full import
17+ chain (main.py -> config.py -> thrift_clients -> folly.iobuf), which
18+ fails because the child doesn't have the PAR bootstrapper's custom
19+ import hooks and the PAR zip shadows the filesystem unpack directory
20+ for packages like folly. We prevent this by clearing __main__.__spec__
21+ and __file__ inside get_context() so the child skips the re-import
22+ entirely — it only needs the target function (e.g. lstat_process),
23+ not the full main module. This must be done inside get_context()
24+ rather than at module level because enable_lazy_imports defers
25+ module-level side effects.
26+
27+ 2. Native library paths: Re-derives native library directories from
28+ sys.path (which still contains Buck2 link-tree paths even after the
29+ bootstrapper cleans LD_LIBRARY_PATH) and sets them as
30+ LD_LIBRARY_PATH / DYLD_LIBRARY_PATH in os.environ so that spawned
31+ child processes can find native .so/.dylib files like folly.iobuf.
1632"""
1733
1834import multiprocessing
@@ -48,23 +64,61 @@ def _setup_library_paths() -> None:
4864 if not dirs :
4965 return
5066
67+ combined = os .pathsep .join (dirs )
68+
5169 if sys .platform == "win32" :
5270 add_dll_directory = getattr (os , "add_dll_directory" , None )
5371 if add_dll_directory is not None :
5472 for d in dirs :
5573 add_dll_directory (d )
5674 elif sys .platform == "darwin" :
5775 existing = os .environ .get ("DYLD_LIBRARY_PATH" , "" )
58- combined = os .pathsep .join (dirs )
5976 if existing :
60- combined = combined + os .pathsep + existing
61- os .environ ["DYLD_LIBRARY_PATH" ] = combined
77+ os .environ ["DYLD_LIBRARY_PATH" ] = combined + os .pathsep + existing
78+ else :
79+ os .environ ["DYLD_LIBRARY_PATH" ] = combined
6280 else :
6381 existing = os .environ .get ("LD_LIBRARY_PATH" , "" )
64- combined = os .pathsep .join (dirs )
6582 if existing :
66- combined = combined + os .pathsep + existing
67- os .environ ["LD_LIBRARY_PATH" ] = combined
83+ os .environ ["LD_LIBRARY_PATH" ] = combined + os .pathsep + existing
84+ else :
85+ os .environ ["LD_LIBRARY_PATH" ] = combined
86+
87+
88+ def _prevent_main_reimport () -> None :
89+ """
90+ Prevent multiprocessing.spawn from re-importing __main__ in children.
91+
92+ In standalone PAR builds, the child process is a bare Python interpreter
93+ without the PAR bootstrapper's custom import hooks. When the child tries
94+ to re-import __main__ (eden.fs.cli.main), it triggers the full import
95+ chain including native extensions like folly.iobuf. These fail because
96+ the PAR zip claims packages like 'folly' via zipimport, but zipimport
97+ cannot load .so extension modules — so folly.iobuf is never found
98+ despite existing in the PAR's unpack directory.
99+
100+ By clearing __main__.__spec__ and __file__, multiprocessing.spawn's
101+ get_preparation_data() won't include 'init_main_from_name' or
102+ 'init_main_from_path', and the child will skip the __main__ re-import
103+ entirely. The child can still import the target function's module
104+ (e.g. eden.fs.cli.mtab for lstat_process) without issues since those
105+ modules don't trigger the problematic import chain.
106+
107+ We access __main__ via sys.modules to bypass the lazy import proxy
108+ that enable_lazy_imports creates.
109+ """
110+ main_mod = sys .modules .get ("__main__" )
111+ if main_mod is None :
112+ return
113+
114+ if getattr (main_mod , "__spec__" , None ) is not None :
115+ main_mod .__spec__ = None # type: ignore[assignment]
116+
117+ if hasattr (main_mod , "__file__" ):
118+ try :
119+ del main_mod .__file__
120+ except AttributeError :
121+ pass
68122
69123
70124_setup_library_paths ()
@@ -74,13 +128,16 @@ def get_context() -> multiprocessing.context.DefaultContext:
74128 """
75129 Return the platform-default multiprocessing context.
76130
77- With _setup_library_paths() having propagated native library dirs into
78- the environment, spawned children can find native modules without
79- needing to force fork.
131+ Clears __main__.__spec__ on each call to prevent multiprocessing.spawn
132+ from re-importing __main__ in the child process. This must be done here
133+ (not at module level) because enable_lazy_imports defers module-level
134+ side effects, and we need the fix applied before Process.start()
135+ captures __main__.__spec__ for the child.
80136
81137 Returns:
82138 The default multiprocessing context for the current platform.
83139 """
140+ _prevent_main_reimport ()
84141 # pyre-ignore[7]: multiprocessing.get_context() is typed as BaseContext
85142 # but actually returns DefaultContext at runtime.
86143 return multiprocessing .get_context ()
0 commit comments