Skip to content

Commit 29f11e0

Browse files
authored
Fix #82: Implement environment-specific concurrency lock (PID file) (#89)
* Fix #82: Implement environment-specific concurrency lock (PID file) Signed-off-by: sushant-suse <[email protected]> * Fix #82: updated as per Toms suggestions Signed-off-by: sushant-suse <[email protected]> * fix #82: making the primary lock check instantaneous Signed-off-by: sushant-suse <[email protected]> * Fix #82: updated as per Toms suggestions for Lock Signed-off-by: sushant-suse <[email protected]> * Fix #82: implement robust environment concurrency lock (PID file) Signed-off-by: sushant-suse <[email protected]> --------- Signed-off-by: sushant-suse <[email protected]>
1 parent 3e3bb1c commit 29f11e0

5 files changed

Lines changed: 374 additions & 7 deletions

File tree

changelog.d/89.feature.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Implemented concurrency control to prevent multiple `docbuild` instances from running simultaneously using the same environment configuration file.

src/docbuild/cli/cmd_cli.py

Lines changed: 28 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121
PROJECT_LEVEL_APP_CONFIG_FILENAMES,
2222
)
2323
from ..logging import setup_logging
24+
from ..utils.pidlock import PidFileLock
2425
from .cmd_build import build
2526
from .cmd_c14n import c14n
2627
from .cmd_config import config
@@ -33,7 +34,7 @@
3334
PYTHON_VERSION = (
3435
f'{sys.version_info.major}.{sys.version_info.minor}.{sys.version_info.micro}'
3536
)
36-
# Global instance of Rich Console
37+
log = logging.getLogger(__name__) # Ensure logger is available globally
3738
CONSOLE = rich.console.Console(stderr=True, highlight=False)
3839

3940
def _setup_console() -> None:
@@ -152,10 +153,35 @@ def cli(
152153
DEFAULT_ENV_CONFIG_FILENAME,
153154
DEFAULT_ENV_CONFIG,
154155
)
156+
# The environment config file path is the resource ID for the lock.
157+
# The path is in context.envconfigfiles, which is a tuple of paths.
158+
159+
env_config_path = context.envconfigfiles[0] if context.envconfigfiles else None
160+
161+
# --- CONCURRENCY CONTROL ---
162+
if env_config_path:
163+
# Wrap the core execution in the PidFileLock context manager
164+
# If the lock cannot be acquired, a RuntimeError is raised, which exits the program.
165+
try:
166+
# Acquire the lock using the environment config file path as the resource ID
167+
ctx.obj.env_lock = PidFileLock(resource_path=env_config_path)
168+
ctx.obj.env_lock.acquire()
169+
log.info("Acquired lock for environment config: %r", env_config_path.name)
170+
except RuntimeError as e:
171+
# If the lock is already held, log the error and exit gracefully.
172+
log.error(str(e))
173+
ctx.exit(1)
174+
except Exception as e:
175+
# Catch all other potential issues during lock acquisition/setup
176+
log.error("Failed to set up environment lock: %s", e)
177+
ctx.exit(1)
178+
179+
# Final config processing must happen outside the lock acquisition check
155180
context.envconfig = replace_placeholders(
156181
context.envconfig,
157182
)
158-
183+
184+
# The acquired lock will be automatically released when the program exits via the atexit.register call in PidFileLock.acquire().
159185

160186
# Add subcommand
161187
cli.add_command(build)

src/docbuild/constants.py

Lines changed: 12 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -138,14 +138,21 @@
138138
"""The default filename for the environment's config file, typically
139139
used in production."""
140140

141-
# --- Logging constants ---
141+
# --- State and Logging Constants (Refactored) ---
142+
143+
BASE_STATE_DIR = Path.home() / '.local' / 'state' / APP_NAME
144+
"""The directory where application state, logs, and locks are stored,
145+
per XDG Base Directory Specification."""
146+
142147
GITLOGGER_NAME = "docbuild.git"
143148
"""The standardized name for the Git-related logger."""
144149

145-
BASE_LOG_DIR = Path.home() / '.local' / 'state' / APP_NAME / 'logs'
146-
"""The directory where log files will be stored, typically at
147-
:file:`~/.local/state/docbuild/logs` as recommended by the XDG Base
148-
Directory Specification."""
150+
BASE_LOG_DIR = BASE_STATE_DIR / 'logs'
151+
"""The directory where log files will be stored."""
152+
153+
# --- Locking constants ---
154+
BASE_LOCK_DIR = BASE_STATE_DIR / 'locks'
155+
"""The directory where PID lock files will be stored."""
149156

150157
XMLDATADIR = Path(__file__).parent / 'config' / 'xml' / 'data'
151158
"""Directory where additional files (RNC, XSLT) for XML processing are stored."""

src/docbuild/utils/pidlock.py

Lines changed: 169 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,169 @@
1+
"""PID file locking utility."""
2+
3+
import atexit
4+
import hashlib
5+
import logging
6+
import os
7+
import errno
8+
import os
9+
from pathlib import Path
10+
from typing import Self, Any
11+
12+
from ..constants import BASE_LOCK_DIR
13+
14+
log = logging.getLogger(__name__)
15+
16+
17+
class PidFileLock:
18+
"""Manages a PID lock file to ensure only one instance of an environment runs.
19+
20+
The lock file is named based on a hash of the environment config file path.
21+
"""
22+
23+
def __init__(self, resource_path: Path, lock_dir: Path = BASE_LOCK_DIR) -> None:
24+
"""Initialize the lock manager.
25+
26+
:param resource_path: The unique path identifying the resource to lock (e.g., env config file).
27+
:param lock_dir: The base directory for lock files.
28+
"""
29+
self.resource_path = resource_path.resolve()
30+
31+
# Converted public attributes to private attributes
32+
self._lock_dir: Path = lock_dir
33+
self._lock_path: Path = self._generate_lock_name()
34+
35+
self._lock_acquired: bool = False
36+
37+
@property
38+
def lock_dir(self) -> Path:
39+
"""The directory where PID lock files are stored."""
40+
return self._lock_dir
41+
42+
@property
43+
def lock_path(self) -> Path:
44+
"""The full path to the lock file."""
45+
return self._lock_path
46+
47+
@property
48+
def lock(self) -> bool:
49+
"""Return the current lock acquisition state."""
50+
return self._lock_acquired
51+
52+
def __enter__(self) -> Self:
53+
"""Acquire the lock."""
54+
self.acquire()
55+
return self
56+
57+
def __exit__(
58+
self, exc_type: type[BaseException] | None, exc_value: BaseException | None, traceback: Any
59+
) -> None:
60+
"""Release the lock."""
61+
self.release()
62+
63+
def _generate_lock_name(self) -> Path:
64+
"""Generate a unique lock file name based on the resource path."""
65+
# SHA256 hash of the absolute path is used to ensure a unique, safe filename.
66+
path_hash = hashlib.sha256(str(self.resource_path).encode('utf-8')).hexdigest()
67+
return self._lock_dir / f'docbuild-{path_hash}.pid'
68+
69+
def acquire(self) -> None:
70+
"""Acquire the lock atomically, or diagnose an existing lock."""
71+
self.lock_dir.mkdir(parents=True, exist_ok=True)
72+
current_pid = os.getpid()
73+
74+
# 1. Attempt to acquire the lock file atomically (O_CREAT | os.O_EXCL)
75+
try:
76+
# os.O_WRONLY | os.O_CREAT | os.O_EXCL ensures file is created ONLY if it doesn't exist.
77+
# 0o644 sets the file permissions (read/write for owner, read for others).
78+
lock_fd = os.open(
79+
self.lock_path, os.O_WRONLY | os.O_CREAT | os.O_EXCL, 0o644
80+
)
81+
# Write PID to the atomically created file
82+
with os.fdopen(lock_fd, 'w') as f:
83+
f.write(str(current_pid) + '\n')
84+
85+
self._lock_acquired = True
86+
log.debug(f"Acquired lock for {self.resource_path} at PID {current_pid}.")
87+
# Registering release ensures the lock is cleaned up when the program exits.
88+
atexit.register(self.release)
89+
return
90+
91+
except FileExistsError:
92+
# 2. Lock file exists (atomic open failed). Must now check if it's stale or running.
93+
pass
94+
except OSError as e:
95+
# Catch file system errors during the atomic open operation
96+
raise RuntimeError(f"Failed to create lock file at {self.lock_path}: {e}")
97+
98+
# 3. Lock exists. Check if the process is running.
99+
if self.lock_path.exists():
100+
try:
101+
# Read PID from the file content (slower operation, only done on contention)
102+
with self.lock_path.open('r') as f:
103+
pid_str = f.read().strip()
104+
105+
pid = int(pid_str)
106+
107+
# Check if the process is actually running
108+
if self._is_pid_running(pid):
109+
# Running instance found. Raise the error.
110+
raise RuntimeError(
111+
f"docbuild instance already running (PID: {pid}) "
112+
f"for configuration: {self.resource_path}"
113+
)
114+
else:
115+
# Stale lock. Attempt to clean up and acquire again.
116+
log.warning(
117+
f"Found stale lock file at {self.lock_path} (PID {pid}). Removing and retrying acquisition."
118+
)
119+
self.lock_path.unlink()
120+
121+
# Recursively call acquire() to try and grab the lock immediately.
122+
return self.acquire()
123+
124+
except FileNotFoundError:
125+
# Race condition: another process removed the stale lock before us. Retry.
126+
return self.acquire()
127+
except ValueError:
128+
# Invalid PID format. Treat as stale and clean up.
129+
log.warning(f"Lock file at {self.lock_path} contains invalid PID. Removing and retrying.")
130+
self.lock_path.unlink()
131+
return self.acquire()
132+
except OSError as e:
133+
# Non-critical I/O error during read/unlink attempt
134+
log.error(f"Non-critical error while checking lock file: {e}")
135+
136+
# If all checks and retries fail to acquire the lock, raise a final error.
137+
raise RuntimeError(f"Failed to acquire lock for {self.resource_path} after multiple checks.")
138+
139+
def release(self) -> None:
140+
"""Release the lock file."""
141+
if self._lock_acquired and self.lock_path.exists():
142+
try:
143+
self.lock_path.unlink()
144+
self._lock_acquired = False
145+
log.debug("Released lock at %s.", self.lock_path)
146+
147+
# Unregister the cleanup function
148+
atexit.unregister(self.release)
149+
except OSError as e:
150+
log.error(f"Failed to remove lock file at {self.lock_path}: {e}")
151+
152+
@staticmethod
153+
def _is_pid_running(pid: int) -> bool:
154+
"""Check if a process with the given PID is currently running.
155+
156+
:param pid: The PID to check
157+
:return: True, if the process with the given PID is running,
158+
False otherwise
159+
"""
160+
if pid <= 0:
161+
return False
162+
try:
163+
# Sending signal 0 will raise an OSError exception if the pid is
164+
# not running. Do nothing otherwise.
165+
os.kill(pid, 0)
166+
return True
167+
except OSError as err:
168+
# Check for ESRCH (No such process)
169+
return err.errno != errno.ESRCH

0 commit comments

Comments
 (0)