Skip to content

Commit 307fc88

Browse files
committed
Improve WSL2 Docker daemon sharing and diagnostics
1 parent 6b2c4c9 commit 307fc88

4 files changed

Lines changed: 168 additions & 28 deletions

File tree

src/main/java/io/github/intisy/docker/WindowsDockerProvider.java

Lines changed: 165 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -439,10 +439,21 @@ private void startNativeDocker() throws IOException, InterruptedException {
439439
}
440440

441441
private int dockerPort;
442+
private boolean usingExistingDaemon = false;
443+
private static final Object EXISTING_DAEMON_LOCK = new Object();
444+
private static volatile boolean existingDaemonSetup = false;
445+
private static volatile int sharedDaemonPort = 0;
446+
private static volatile String sharedWslIp = null;
442447

443448
private void startWsl2Docker() throws IOException, InterruptedException {
444449
ensureInstalled();
445450

451+
if (tryConnectToExistingDaemon()) {
452+
log.info("Connected to existing Docker daemon in WSL2");
453+
usingExistingDaemon = true;
454+
return;
455+
}
456+
446457
dockerPort = 2375 + Math.abs(instanceId.hashCode() % 1000);
447458
wslSocketPath = "tcp://0.0.0.0:" + dockerPort;
448459
String wslLogFile = "/tmp/docker-java-" + instanceId + ".log";
@@ -492,7 +503,7 @@ private void startWsl2Docker() throws IOException, InterruptedException {
492503
String isolationFlags = "";
493504
if (otherDockerdRunning) {
494505
log.info("Another Docker daemon detected, using isolation flags to avoid conflicts");
495-
isolationFlags = " --iptables=false --bridge=none";
506+
isolationFlags = " --iptables=false";
496507
}
497508

498509
log.debug("Starting dockerd directly...");
@@ -582,6 +593,147 @@ private void startWsl2Docker() throws IOException, InterruptedException {
582593
log.info("Docker daemon started in WSL2 (instance: {}, port: {})", instanceId, dockerPort);
583594
}
584595

596+
/**
597+
* Try to connect to an existing Docker daemon running in WSL2.
598+
* This checks if Docker is running, starts it if needed, and exposes it on TCP.
599+
* Uses synchronization to prevent race conditions when multiple threads call this.
600+
*/
601+
private boolean tryConnectToExistingDaemon() {
602+
synchronized (EXISTING_DAEMON_LOCK) {
603+
if (existingDaemonSetup && sharedDaemonPort > 0 && sharedWslIp != null) {
604+
dockerPort = sharedDaemonPort;
605+
wslIpAddress = sharedWslIp;
606+
log.info("Using existing Docker daemon connection on port {}", dockerPort);
607+
return true;
608+
}
609+
610+
try {
611+
String socketCheck = runWslCommand("test -S /var/run/docker.sock && echo yes || echo no", false, 5);
612+
if (!"yes".equals(socketCheck.trim())) {
613+
log.debug("Docker socket /var/run/docker.sock does not exist");
614+
615+
log.info("Docker daemon not running, attempting to start it...");
616+
String startResult = runWslCommand("sudo service docker start 2>&1", false, 30);
617+
log.debug("Docker service start result: {}", startResult);
618+
619+
Thread.sleep(3000);
620+
621+
socketCheck = runWslCommand("test -S /var/run/docker.sock && echo yes || echo no", false, 5);
622+
if (!"yes".equals(socketCheck.trim())) {
623+
log.debug("Docker socket still doesn't exist after service start");
624+
return false;
625+
}
626+
}
627+
628+
log.info("Found Docker daemon in WSL2, setting up TCP forwarding...");
629+
630+
String wslIp = runWslCommand("hostname -I | awk '{print $1}'", false, 5).trim();
631+
if (wslIp.isEmpty()) {
632+
log.info("Could not get WSL2 IP address, will use isolated daemon");
633+
return false;
634+
}
635+
wslIpAddress = wslIp;
636+
log.info("WSL2 IP: {}", wslIp);
637+
638+
String socatPath = runWslCommand("command -v socat 2>/dev/null || echo ''", false, 5).trim();
639+
if (socatPath.isEmpty()) {
640+
log.info("Installing socat for TCP forwarding...");
641+
runWslCommand("sudo apt-get update -qq && sudo apt-get install -y -qq socat 2>&1", false, 120);
642+
socatPath = runWslCommand("command -v socat 2>/dev/null || echo ''", false, 5).trim();
643+
if (socatPath.isEmpty()) {
644+
log.info("Failed to install socat, will use isolated daemon");
645+
return false;
646+
}
647+
}
648+
649+
dockerPort = 2375;
650+
651+
runWslCommand("sudo pkill -9 -f 'socat.*:" + dockerPort + "' 2>/dev/null; sleep 1", false, 10);
652+
653+
String socketPerms = runWslCommand("ls -la /var/run/docker.sock 2>&1", false, 5);
654+
log.info("Docker socket: {}", socketPerms.trim());
655+
656+
String socatCmd = String.format(
657+
"sudo nohup socat TCP-LISTEN:%d,bind=0.0.0.0,reuseaddr,fork UNIX-CONNECT:/var/run/docker.sock </dev/null >/tmp/socat-%d.log 2>&1 &",
658+
dockerPort, dockerPort);
659+
runWslCommand(socatCmd, false, 5);
660+
661+
Thread.sleep(2000);
662+
663+
String socatPid = runWslCommand("pgrep -f 'socat.*:" + dockerPort + "' 2>/dev/null | head -1 || echo ''", false, 5).trim();
664+
if (socatPid.isEmpty()) {
665+
String socatLog = runWslCommand("cat /tmp/socat-" + dockerPort + ".log 2>/dev/null | head -20 || echo '(no log)'", false, 5);
666+
log.info("socat failed to start. Log: {}", socatLog);
667+
return false;
668+
}
669+
log.info("socat running with PID: {}", socatPid);
670+
671+
String listenCheck = runWslCommand("ss -tlnp 2>/dev/null | grep ':" + dockerPort + " ' || echo 'not listening'", false, 5);
672+
log.info("Port {} status: {}", dockerPort, listenCheck.trim());
673+
674+
boolean connected = testDockerConnection(wslIp, dockerPort);
675+
if (!connected) {
676+
connected = testDockerConnection("localhost", dockerPort);
677+
if (connected) {
678+
wslIpAddress = "localhost";
679+
}
680+
}
681+
682+
if (connected) {
683+
sharedDaemonPort = dockerPort;
684+
sharedWslIp = wslIpAddress;
685+
existingDaemonSetup = true;
686+
log.info("Connected to Docker daemon via TCP at {}:{}", wslIpAddress, dockerPort);
687+
return true;
688+
}
689+
690+
String socatLog = runWslCommand("cat /tmp/socat-" + dockerPort + ".log 2>/dev/null | tail -10 || echo '(no log)'", false, 5);
691+
log.info("Connection failed, socat log: {}", socatLog);
692+
return false;
693+
} catch (Exception e) {
694+
log.info("Failed to connect to existing daemon: {}", e.getMessage());
695+
return false;
696+
}
697+
}
698+
}
699+
700+
/**
701+
* Test Docker connection from Windows by attempting a TCP socket connection and HTTP request.
702+
*/
703+
private boolean testDockerConnection(String host, int port) {
704+
try {
705+
log.info("Testing Docker connection to {}:{}...", host, port);
706+
707+
java.net.Socket socket = new java.net.Socket();
708+
socket.connect(new java.net.InetSocketAddress(host, port), 3000);
709+
socket.close();
710+
log.info("TCP socket connection successful to {}:{}", host, port);
711+
712+
java.net.URL url = new java.net.URL("http://" + host + ":" + port + "/version");
713+
java.net.HttpURLConnection conn = (java.net.HttpURLConnection) url.openConnection();
714+
conn.setConnectTimeout(3000);
715+
conn.setReadTimeout(3000);
716+
int responseCode = conn.getResponseCode();
717+
conn.disconnect();
718+
719+
if (responseCode == 200) {
720+
log.info("Docker API responding at {}:{}", host, port);
721+
return true;
722+
}
723+
log.info("Docker API returned HTTP {} at {}:{}", responseCode, host, port);
724+
return false;
725+
} catch (java.net.ConnectException e) {
726+
log.info("Connection refused to {}:{} - socat may not be forwarding correctly", host, port);
727+
return false;
728+
} catch (java.net.SocketTimeoutException e) {
729+
log.info("Connection timeout to {}:{}", host, port);
730+
return false;
731+
} catch (IOException e) {
732+
log.info("Connection test failed for {}:{} - {}: {}", host, port, e.getClass().getSimpleName(), e.getMessage());
733+
return false;
734+
}
735+
}
736+
585737
/**
586738
* Check if passwordless sudo is available for dockerd.
587739
*/
@@ -771,6 +923,18 @@ public DockerClient getClient() {
771923
public void stop() {
772924
log.info("Stopping Docker daemon (instance: {})...", instanceId);
773925

926+
if (usingExistingDaemon && usingWsl2 && wslDistro != null) {
927+
try {
928+
ProcessBuilder pb = new ProcessBuilder("wsl", "-d", wslDistro, "-e", "bash", "-c",
929+
"sudo -n pkill -f 'socat.*:" + dockerPort + "' 2>/dev/null || true");
930+
pb.start().waitFor(5, TimeUnit.SECONDS);
931+
log.info("Stopped socat TCP forwarder");
932+
} catch (IOException | InterruptedException e) {
933+
log.debug("Failed to stop socat: {}", e.getMessage());
934+
}
935+
return;
936+
}
937+
774938
if (dockerProcess != null) {
775939
if (usingWsl2 && wslDistro != null) {
776940
try {

src/main/java/io/github/intisy/docker/transport/DockerHttpClient.java

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -68,6 +68,9 @@ public DockerResponse post(String path) throws IOException {
6868
public DockerResponse post(String path, Map<String, String> queryParams, Object body) throws IOException {
6969
String fullPath = buildPathWithQuery(path, queryParams);
7070
String jsonBody = body != null ? gson.toJson(body) : null;
71+
if (jsonBody != null && path.contains("/containers/create")) {
72+
log.info("Creating container with JSON: {}", jsonBody);
73+
}
7174
return request("POST", fullPath, jsonBody);
7275
}
7376

src/test/java/io/github/intisy/docker/NvidiaToolkitTest.java

Lines changed: 0 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,6 @@ static void setup() throws IOException, InterruptedException {
3636
log.info("=== NVIDIA Container Toolkit Test Setup ===");
3737
provider = new WindowsDockerProvider();
3838

39-
// Try to start the provider to initialize WSL2
4039
try {
4140
provider.start();
4241
hasWsl2 = true;
@@ -66,7 +65,6 @@ void testIsWindows() {
6665
log.info("OS: {}", System.getProperty("os.name"));
6766
log.info("Is Windows: {}", isWindows);
6867

69-
// This test just logs info, doesn't fail
7068
assertTrue(true);
7169
}
7270

@@ -100,7 +98,6 @@ void testNvidiaGpuDetection() {
10098
log.info("No NVIDIA GPU detected. This is expected if you don't have an NVIDIA GPU.");
10199
}
102100

103-
// Test passes regardless - we're just checking detection works
104101
assertTrue(true);
105102
}
106103

@@ -121,7 +118,6 @@ void testNvidiaToolkitInstallationStatus() {
121118
log.info("It will be installed automatically when ensureNvidiaContainerToolkit() is called.");
122119
}
123120

124-
// Test passes regardless - we're just checking detection works
125121
assertTrue(true);
126122
}
127123

@@ -142,7 +138,6 @@ void testEnsureNvidiaContainerToolkit() {
142138
if (!gpuAvailable) {
143139
log.info("No NVIDIA GPU detected. ensureNvidiaContainerToolkit() will do nothing.");
144140

145-
// Should not throw, just return silently
146141
assertDoesNotThrow(() -> provider.ensureNvidiaContainerToolkit());
147142
log.info("ensureNvidiaContainerToolkit() completed (no-op, no GPU)");
148143
return;
@@ -151,13 +146,11 @@ void testEnsureNvidiaContainerToolkit() {
151146
if (toolkitInstalledBefore) {
152147
log.info("Toolkit already installed. ensureNvidiaContainerToolkit() will do nothing.");
153148

154-
// Should not throw, just return silently
155149
assertDoesNotThrow(() -> provider.ensureNvidiaContainerToolkit());
156150
log.info("ensureNvidiaContainerToolkit() completed (no-op, already installed)");
157151
return;
158152
}
159153

160-
// GPU available but toolkit not installed - will attempt installation
161154
log.info("GPU available but toolkit not installed. Will attempt automatic installation...");
162155
log.info("This may take a few minutes...");
163156

@@ -174,7 +167,6 @@ void testEnsureNvidiaContainerToolkit() {
174167
} catch (IOException e) {
175168
String message = e.getMessage();
176169

177-
// If it's a prerequisite issue (passwordless sudo not set up), skip the test with instructions
178170
if (message != null && message.contains("Passwordless sudo")) {
179171
log.warn("=== ONE-TIME SETUP REQUIRED ===");
180172
log.warn("Run these commands in WSL to enable automatic NVIDIA toolkit installation:");
@@ -187,11 +179,9 @@ void testEnsureNvidiaContainerToolkit() {
187179
log.warn("Then run this test again.");
188180
log.warn("================================");
189181

190-
// Skip the test instead of failing - prerequisite not met
191182
assumeTrue(false, "Skipping: Passwordless sudo not configured. See instructions above.");
192183
}
193184

194-
// For other errors, fail the test
195185
log.error("Failed to install NVIDIA Container Toolkit: {}", message);
196186
log.error("This might be due to network issues.");
197187
log.error("You can install manually by running the commands shown in the error message.");
@@ -212,7 +202,6 @@ void testGpuContainerCreation() {
212202

213203
DockerClient client = provider.getClient();
214204

215-
// Try to pull and run nvidia-smi in a container
216205
try {
217206
log.info("Pulling nvidia/cuda:12.0.0-base-ubuntu22.04 image...");
218207
client.pullImage("nvidia/cuda:12.0.0-base-ubuntu22.04").exec(10, java.util.concurrent.TimeUnit.MINUTES);
@@ -228,26 +217,21 @@ void testGpuContainerCreation() {
228217
String containerId = response.getId();
229218
log.info("Container created: {}", containerId);
230219

231-
// Start the container
232220
client.startContainer(containerId).exec();
233221
log.info("Container started");
234222

235-
// Wait for it to finish
236223
client.waitContainer(containerId).exec();
237224

238-
// Get logs
239225
String logs = client.logs(containerId)
240226
.withStdout(true)
241227
.withStderr(true)
242228
.exec();
243229

244230
log.info("Container output:\n{}", logs);
245231

246-
// Cleanup
247232
client.removeContainer(containerId).exec();
248233
log.info("Container removed");
249234

250-
// Check if nvidia-smi output looks valid
251235
assertTrue(logs.contains("NVIDIA") || logs.contains("GPU"),
252236
"nvidia-smi output should contain GPU information");
253237

0 commit comments

Comments
 (0)