From 53ee7bb1339075f56634999f7789a0f35ac96c7b Mon Sep 17 00:00:00 2001 From: Sergio Lopez Date: Mon, 16 Mar 2026 11:27:16 +0100 Subject: [PATCH 1/3] init: embed a simple DHCP client If there's a eth0 interface present, and one of the interfaces has been configured with the newly introduced NET_FLAG_DHCP_CLIENT flag, the embedded dhcp client will try to obtain an address. For 2.x we should be more flexible and actually allow both dhcp and static IP configuration on specific interfaces (identified by their MAC addresses, since ifnames aren't deterministic), but this should be good enough for 1.x. Signed-off-by: Sergio Lopez --- include/libkrun.h | 3 +- init/dhcp.c | 524 +++++++++++++++++++++++++++++++++++++++ init/dhcp.h | 59 +++++ init/init.c | 19 ++ src/devices/build.rs | 7 + src/libkrun/src/lib.rs | 24 +- src/vmm/src/builder.rs | 5 + src/vmm/src/resources.rs | 3 + 8 files changed, 638 insertions(+), 6 deletions(-) create mode 100644 init/dhcp.c create mode 100644 init/dhcp.h diff --git a/include/libkrun.h b/include/libkrun.h index 651c46ea5..2831e940d 100644 --- a/include/libkrun.h +++ b/include/libkrun.h @@ -354,7 +354,8 @@ int32_t krun_add_virtiofs3(uint32_t ctx_id, /* Send the VFKIT magic after establishing the connection, as required by gvproxy in vfkit mode. */ -#define NET_FLAG_VFKIT 1 << 0 +#define NET_FLAG_VFKIT (1 << 0) +#define NET_FLAG_DHCP_CLIENT (1 << 1) /* TSI (Transparent Socket Impersonation) feature flags for vsock */ #define KRUN_TSI_HIJACK_INET (1 << 0) diff --git a/init/dhcp.c b/init/dhcp.c new file mode 100644 index 000000000..b7e431042 --- /dev/null +++ b/init/dhcp.c @@ -0,0 +1,524 @@ +/* + * DHCP Client Implementation + * + * Standalone DHCP client for configuring IPv4 network interfaces. + * Translated from Rust implementation in muvm/src/guest/net.rs + */ + +#include "dhcp.h" + +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define DHCP_BUFFER_SIZE 576 + +/* Helper function to send netlink message */ +static int nl_send(int sock, struct nlmsghdr *nlh) +{ + struct sockaddr_nl sa = { + .nl_family = AF_NETLINK, + }; + + struct iovec iov = { + .iov_base = nlh, + .iov_len = nlh->nlmsg_len, + }; + + struct msghdr msg = { + .msg_name = &sa, + .msg_namelen = sizeof(sa), + .msg_iov = &iov, + .msg_iovlen = 1, + }; + + return sendmsg(sock, &msg, 0); +} + +/* Helper function to receive netlink response */ +static int nl_recv(int sock, char *buf, size_t len) +{ + struct sockaddr_nl sa; + struct iovec iov = { + .iov_base = buf, + .iov_len = len, + }; + + struct msghdr msg = { + .msg_name = &sa, + .msg_namelen = sizeof(sa), + .msg_iov = &iov, + .msg_iovlen = 1, + }; + + return recvmsg(sock, &msg, 0); +} + +/* Add routing attribute to netlink message */ +static void add_rtattr(struct nlmsghdr *nlh, int type, const void *data, + int len) +{ + int rtalen = RTA_SPACE(len); + struct rtattr *rta = + (struct rtattr *)(((char *)nlh) + NLMSG_ALIGN(nlh->nlmsg_len)); + rta->rta_type = type; + rta->rta_len = RTA_LENGTH(len); + memcpy(RTA_DATA(rta), data, len); + nlh->nlmsg_len = NLMSG_ALIGN(nlh->nlmsg_len) + rtalen; +} + +/* Set MTU */ +static int set_mtu(int nl_sock, int iface_index, unsigned int mtu) +{ + char buf[4096]; + struct nlmsghdr *nlh; + struct nlmsgerr *err; + struct ifinfomsg *ifi; + + memset(buf, 0, sizeof(buf)); + nlh = (struct nlmsghdr *)buf; + nlh->nlmsg_len = NLMSG_LENGTH(sizeof(struct ifinfomsg)); + nlh->nlmsg_type = RTM_NEWLINK; + nlh->nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK; + nlh->nlmsg_seq = 1; + nlh->nlmsg_pid = getpid(); + + ifi = (struct ifinfomsg *)NLMSG_DATA(nlh); + ifi->ifi_family = AF_UNSPEC; + ifi->ifi_type = ARPHRD_ETHER; + ifi->ifi_index = iface_index; + + add_rtattr(nlh, IFLA_MTU, &mtu, sizeof(mtu)); + + if (nl_send(nl_sock, nlh) < 0) { + perror("nl_send failed for set_mtu"); + return -1; + } + + /* Receive ACK */ + int len = nl_recv(nl_sock, buf, sizeof(buf)); + if (len < (int)NLMSG_LENGTH(sizeof(struct nlmsgerr))) { + perror("nl_recv failed for set_mtu"); + return -1; + } + + if (nlh->nlmsg_type != NLMSG_ERROR) { + printf("netlink didn't return a valid answer for set_mtu\n"); + return -1; + } + + err = (struct nlmsgerr *)NLMSG_DATA(nlh); + if (err->error != 0) { + printf("netlink returned an error for set_mtu: %d\n", err->error); + return -1; + } + + return 0; +} + +/* Add or delete IPv4 route */ +static int mod_route4(int nl_sock, int iface_index, int cmd, struct in_addr gw) +{ + char buf[4096]; + struct nlmsghdr *nlh; + struct nlmsgerr *err; + struct rtmsg *rtm; + struct in_addr dst = {.s_addr = INADDR_ANY}; + + memset(buf, 0, sizeof(buf)); + nlh = (struct nlmsghdr *)buf; + nlh->nlmsg_len = NLMSG_LENGTH(sizeof(struct rtmsg)); + nlh->nlmsg_type = cmd; + nlh->nlmsg_flags = NLM_F_REQUEST | NLM_F_CREATE | NLM_F_ACK; + nlh->nlmsg_seq = 1; + nlh->nlmsg_pid = getpid(); + + rtm = (struct rtmsg *)NLMSG_DATA(nlh); + rtm->rtm_family = AF_INET; + rtm->rtm_dst_len = 0; + rtm->rtm_src_len = 0; + rtm->rtm_tos = 0; + rtm->rtm_table = RT_TABLE_MAIN; + rtm->rtm_protocol = RTPROT_BOOT; + rtm->rtm_scope = RT_SCOPE_UNIVERSE; + rtm->rtm_type = RTN_UNICAST; + rtm->rtm_flags = 0; + + add_rtattr(nlh, RTA_OIF, &iface_index, sizeof(iface_index)); + add_rtattr(nlh, RTA_DST, &dst, sizeof(dst)); + add_rtattr(nlh, RTA_GATEWAY, &gw, sizeof(gw)); + + if (nl_send(nl_sock, nlh) < 0) { + perror("nl_send failed for mod_route4"); + return -1; + } + + /* Receive ACK */ + int len = nl_recv(nl_sock, buf, sizeof(buf)); + if (len < (int)NLMSG_LENGTH(sizeof(struct nlmsgerr))) { + perror("nl_recv failed for mod_route4"); + return -1; + } + + if (nlh->nlmsg_type != NLMSG_ERROR) { + printf("netlink didn't return a valid answer for mod_route4\n"); + return -1; + } + + err = (struct nlmsgerr *)NLMSG_DATA(nlh); + if (err->error != 0) { + printf("netlink returned an error for mod_route4: %d\n", err->error); + return -1; + } + + return 0; +} + +/* Add or delete IPv4 address */ +static int mod_addr4(int nl_sock, int iface_index, int cmd, struct in_addr addr, + unsigned char prefix_len) +{ + char buf[4096]; + struct nlmsghdr *nlh; + struct nlmsgerr *err; + struct ifaddrmsg *ifa; + + memset(buf, 0, sizeof(buf)); + nlh = (struct nlmsghdr *)buf; + nlh->nlmsg_len = NLMSG_LENGTH(sizeof(struct ifaddrmsg)); + nlh->nlmsg_type = cmd; + nlh->nlmsg_flags = NLM_F_REQUEST | NLM_F_CREATE | NLM_F_ACK; + nlh->nlmsg_seq = 1; + nlh->nlmsg_pid = getpid(); + + ifa = (struct ifaddrmsg *)NLMSG_DATA(nlh); + ifa->ifa_family = AF_INET; + ifa->ifa_prefixlen = prefix_len; + ifa->ifa_flags = 0; + ifa->ifa_scope = RT_SCOPE_UNIVERSE; + ifa->ifa_index = iface_index; + + add_rtattr(nlh, IFA_LOCAL, &addr, sizeof(addr)); + add_rtattr(nlh, IFA_ADDRESS, &addr, sizeof(addr)); + + if (nl_send(nl_sock, nlh) < 0) { + perror("nl_send failed for mod_addr4"); + return -1; + } + + /* Receive ACK */ + int len = nl_recv(nl_sock, buf, sizeof(buf)); + if (len < (int)NLMSG_LENGTH(sizeof(struct nlmsgerr))) { + perror("nl_recv failed for mod_addr4"); + return -1; + } + + if (nlh->nlmsg_type != NLMSG_ERROR) { + printf("netlink didn't return a valid answer for mod_addr4\n"); + return -1; + } + + err = (struct nlmsgerr *)NLMSG_DATA(nlh); + if (err->error != 0) { + printf("netlink returned an error for mod_addr4: %d\n", err->error); + return -1; + } + + return 0; +} + +/* Count leading ones in a 32-bit value */ +static unsigned char count_leading_ones(uint32_t val) +{ + unsigned char count = 0; + for (int i = 31; i >= 0; i--) { + if (val & (1U << i)) { + count++; + } else { + break; + } + } + return count; +} + +/* Send DISCOVER with Rapid Commit, process ACK, configure address and route */ +int do_dhcp(const char *iface) +{ + struct sockaddr_in bind_addr, dest_addr; + struct dhcp_packet request = {0}; + unsigned char response[DHCP_BUFFER_SIZE]; + struct timeval timeout; + int iface_index; + int broadcast = 1; + int nl_sock = -1; + int sock = -1; + int ret = -1; + + iface_index = if_nametoindex(iface); + if (iface_index == 0) { + perror("Failed to find index for network interface"); + return ret; + } + + nl_sock = socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE); + if (nl_sock < 0) { + perror("Failed to create netlink socket"); + return ret; + } + + struct sockaddr_nl sa = { + .nl_family = AF_NETLINK, + .nl_pid = getpid(), + .nl_groups = 0, + }; + + if (bind(nl_sock, (struct sockaddr *)&sa, sizeof(sa)) < 0) { + perror("Failed to bind netlink socket"); + goto cleanup; + } + + /* Temporary link-local address and route avoid the need for raw sockets */ + struct in_addr temp_addr; + inet_pton(AF_INET, "169.254.1.1", &temp_addr); + struct in_addr temp_gw = {.s_addr = INADDR_ANY}; + + if (mod_route4(nl_sock, iface_index, RTM_NEWROUTE, temp_gw) != 0) { + printf("couldn't add temporary route\n"); + goto cleanup; + } + if (mod_addr4(nl_sock, iface_index, RTM_NEWADDR, temp_addr, 16) != 0) { + printf("couldn't add temporary address\n"); + goto cleanup; + } + + /* Send request (DHCPDISCOVER) */ + sock = socket(AF_INET, SOCK_DGRAM, IPPROTO_UDP); + if (sock < 0) { + perror("socket failed"); + goto cleanup; + } + + /* Allow broadcast */ + if (setsockopt(sock, SOL_SOCKET, SO_BROADCAST, &broadcast, + sizeof(broadcast)) < 0) { + perror("setsockopt SO_BROADCAST failed"); + goto cleanup; + } + + /* Bind to port 68 (DHCP client) */ + memset(&bind_addr, 0, sizeof(bind_addr)); + bind_addr.sin_family = AF_INET; + bind_addr.sin_port = htons(68); + bind_addr.sin_addr.s_addr = INADDR_ANY; + + if (bind(sock, (struct sockaddr *)&bind_addr, sizeof(bind_addr)) < 0) { + perror("bind failed"); + goto cleanup; + } + + request.op = 1; /* BOOTREQUEST */ + request.htype = 1; /* Hardware address type: Ethernet */ + request.hlen = 6; /* Hardware address length */ + request.hops = 0; /* DHCP relay Hops */ + request.xid = + htonl(getpid()); /* Transaction ID: use PID for some randomness */ + request.secs = + 0; /* Seconds elapsed since beginning of acquisition or renewal */ + request.flags = htons(0x8000); /* DHCP message flags: Broadcast */ + request.ciaddr = 0; /* Client IP address (not set yet) */ + request.yiaddr = 0; /* 'your' IP address (server will fill) */ + request.siaddr = 0; /* Server IP address (not set) */ + request.giaddr = 0; /* Relay agent IP address (not set) */ + request.magic = htonl(0x63825363); /* Magic cookie */ + + /* Populate chaddr with the interface's MAC address */ + struct ifreq mac_ifr; + memset(&mac_ifr, 0, sizeof(mac_ifr)); + strncpy(mac_ifr.ifr_name, iface, IFNAMSIZ); + + if (ioctl(sock, SIOCGIFHWADDR, &mac_ifr) < 0) { + perror("ioctl(SIOCGIFHWADDR) failed"); + goto cleanup; + } + memcpy(request.chaddr, mac_ifr.ifr_hwaddr.sa_data, 6); + + /* Build DHCP options */ + int opt_offset = 0; + + /* Option 53: DHCP Message Type = DISCOVER (1) */ + request.options[opt_offset++] = 53; + request.options[opt_offset++] = 1; + request.options[opt_offset++] = 1; + + /* Option 80: Rapid Commit (RFC 4039) */ + request.options[opt_offset++] = 80; + request.options[opt_offset++] = 0; + + /* Option 255: End of options */ + request.options[opt_offset++] = 0xff; + + /* Remaining bytes are padding (up to 300 bytes) */ + + /* Send DHCP DISCOVER */ + memset(&dest_addr, 0, sizeof(dest_addr)); + dest_addr.sin_family = AF_INET; + dest_addr.sin_port = htons(67); + dest_addr.sin_addr.s_addr = INADDR_BROADCAST; + + if (sendto(sock, &request, sizeof(request), 0, + (struct sockaddr *)&dest_addr, sizeof(dest_addr)) < 0) { + perror("sendto failed"); + goto cleanup; + } + + /* Keep IPv6-only fast: set receive timeout to 100ms */ + timeout.tv_sec = 0; + timeout.tv_usec = 100000; + if (setsockopt(sock, SOL_SOCKET, SO_RCVTIMEO, &timeout, sizeof(timeout)) < + 0) { + perror("setsockopt SO_RCVTIMEO failed"); + goto cleanup; + } + + /* Get and process response (DHCPACK) if any */ + struct sockaddr_in from_addr; + socklen_t from_len = sizeof(from_addr); + ssize_t len = recvfrom(sock, response, sizeof(response), 0, + (struct sockaddr *)&from_addr, &from_len); + + close(sock); + sock = -1; + + if (len > 0) { + /* Parse DHCP response */ + struct in_addr addr; + /* yiaddr is at offset 16-19 in network byte order */ + memcpy(&addr.s_addr, &response[16], sizeof(addr.s_addr)); + + struct in_addr netmask = {.s_addr = INADDR_ANY}; + struct in_addr router = {.s_addr = INADDR_ANY}; + /* Clamp MTU to passt's limit */ + uint16_t mtu = 65520; + + FILE *resolv = fopen("/etc/resolv.conf", "w"); + if (!resolv) { + perror("Failed to open /etc/resolv.conf"); + } + + /* Parse DHCP options (start at offset 240 after magic cookie) */ + size_t p = 240; + while (p < (size_t)len) { + unsigned char opt = response[p]; + + if (opt == 0xff) { + /* Option 255: End (of options) */ + break; + } + + if (opt == 0) { /* Padding */ + p++; + continue; + } + + unsigned char opt_len = response[p + 1]; + p += 2; /* Length doesn't include code and length field itself */ + + if (p + opt_len > (size_t)len) { + /* Malformed packet, option length exceeds packet boundary */ + break; + } + + if (opt == 1) { + /* Option 1: Subnet Mask */ + memcpy(&netmask.s_addr, &response[p], sizeof(netmask.s_addr)); + } else if (opt == 3) { + /* Option 3: Router */ + memcpy(&router.s_addr, &response[p], sizeof(router.s_addr)); + } else if (opt == 6) { + /* Option 6: Domain Name Server */ + if (resolv) { + for (int dns_p = p; dns_p + 3 < p + opt_len; dns_p += 4) { + fprintf(resolv, "nameserver %d.%d.%d.%d\n", + response[dns_p], response[dns_p + 1], + response[dns_p + 2], response[dns_p + 3]); + } + } + } else if (opt == 26) { + /* Option 26: Interface MTU */ + mtu = (response[p] << 8) | response[p + 1]; + + /* We don't know yet if IPv6 is available: don't go below 1280 B + */ + if (mtu < 1280) + mtu = 1280; + if (mtu > 65520) + mtu = 65520; + } + + p += opt_len; + } + + if (resolv) { + fclose(resolv); + } + + /* Calculate prefix length from netmask */ + unsigned char prefix_len = count_leading_ones(ntohl(netmask.s_addr)); + + /* Drop temporary address and route, configure what we got instead */ + if (mod_route4(nl_sock, iface_index, RTM_DELROUTE, temp_gw) != 0) { + printf("couldn't remove temporary route\n"); + goto cleanup; + } + if (mod_addr4(nl_sock, iface_index, RTM_DELADDR, temp_addr, 16) != 0) { + printf("couldn't remove temporary address\n"); + goto cleanup; + } + + if (mod_addr4(nl_sock, iface_index, RTM_NEWADDR, addr, prefix_len) != + 0) { + printf("couldn't add the address provided by the DHCP server\n"); + goto cleanup; + } + if (mod_route4(nl_sock, iface_index, RTM_NEWROUTE, router) != 0) { + printf( + "couldn't add the default route provided by the DHCP server\n"); + goto cleanup; + } + + set_mtu(nl_sock, iface_index, mtu); + } else { + /* Clean up: we're clearly too cool for IPv4 */ + if (mod_route4(nl_sock, iface_index, RTM_DELROUTE, temp_gw) != 0) { + printf("couldn't remove temporary route\n"); + } + if (mod_addr4(nl_sock, iface_index, RTM_DELADDR, temp_addr, 16) != 0) { + printf("couldn't remove temporary address\n"); + } + } + + ret = 0; + +cleanup: + if (sock >= 0) { + close(sock); + } + if (nl_sock >= 0) { + close(nl_sock); + } + return ret; +} diff --git a/init/dhcp.h b/init/dhcp.h new file mode 100644 index 000000000..2a4abfb1a --- /dev/null +++ b/init/dhcp.h @@ -0,0 +1,59 @@ +/* + * DHCP Client Implementation + * + * Standalone DHCP client for configuring IPv4 network interfaces. + * Translated from Rust implementation in muvm/src/guest/net.rs + */ + +#ifndef DHCP_H +#define DHCP_H + +#include + +/* BOOTP vendor-specific area size (64) - magic cookie (4) */ +#define DHCP_OPTIONS_SIZE 60 + +/* DHCP packet structure (RFC 2131) */ +struct dhcp_packet { + uint8_t op; /* Message op code / message type (1 = BOOTREQUEST) */ + uint8_t htype; /* Hardware address type (1 = Ethernet) */ + uint8_t hlen; /* Hardware address length (6 for Ethernet) */ + uint8_t hops; /* Client sets to zero */ + uint32_t xid; /* Transaction ID */ + uint16_t secs; /* Seconds elapsed since client began address acquisition */ + uint16_t flags; /* Flags (0x8000 = Broadcast) */ + uint32_t ciaddr; /* Client IP address */ + uint32_t yiaddr; /* 'your' (client) IP address */ + uint32_t siaddr; /* IP address of next server to use in bootstrap */ + uint32_t giaddr; /* Relay agent IP address */ + uint8_t chaddr[16]; /* Client hardware address */ + uint8_t sname[64]; /* Optional server host name */ + uint8_t file[128]; /* Boot file name */ + uint32_t magic; /* Magic cookie (0x63825363) */ + uint8_t options[DHCP_OPTIONS_SIZE]; /* Options field */ +} __attribute__((packed)); + +/* + * Perform DHCP discovery and configuration for a network interface + * + * This function: + * 1. Sets up a temporary link-local address (169.254.1.1/16) + * 2. Sends a DHCP DISCOVER message with Rapid Commit option + * 3. Waits up to 100ms for a DHCP ACK response + * 4. Parses the response and configures: + * - IPv4 address with appropriate prefix length + * - Default gateway route + * - DNS servers (overwriting /etc/resolv.conf) + * - Interface MTU + * 5. Cleans up temporary configuration + * + * Parameters: + * iface - The name of the network interface to be configured. + * + * Returns: + * 0 on success (whether or not DHCP response was received) + * -1 on error + */ +int do_dhcp(const char *iface); + +#endif /* DHCP_H */ diff --git a/init/init.c b/init/init.c index 2f1e74af5..54b75ae34 100644 --- a/init/init.c +++ b/init/init.c @@ -32,6 +32,7 @@ #include #endif +#include "dhcp.h" #include "jsmn.h" #ifdef SEV @@ -1183,6 +1184,7 @@ int main(int argc, char **argv) char *krun_home; char *krun_term; char *krun_init; + char *krun_dhcp; #if __linux__ int fd; char *krun_root; @@ -1297,6 +1299,23 @@ int main(int argc, char **argv) strncpy(ifr.ifr_name, "lo", IFNAMSIZ); ifr.ifr_flags |= IFF_UP; ioctl(sockfd, SIOCSIFFLAGS, &ifr); + + krun_dhcp = getenv("KRUN_DHCP"); + if (krun_dhcp && strcmp(krun_dhcp, "1") == 0) { + memset(&ifr, 0, sizeof ifr); + strncpy(ifr.ifr_name, "eth0", IFNAMSIZ); + if (ioctl(sockfd, SIOCGIFFLAGS, &ifr) == 0) { + /* eth0 exists, bring it up first */ + ifr.ifr_flags |= IFF_UP; + ioctl(sockfd, SIOCSIFFLAGS, &ifr); + + /* Configure eth0 with DHCP */ + if (do_dhcp("eth0") != 0) { + printf("Warning: DHCP configuration for eth0 failed\n"); + } + } + } + close(sockfd); } diff --git a/src/devices/build.rs b/src/devices/build.rs index 0d5cc0c97..49a4346d2 100644 --- a/src/devices/build.rs +++ b/src/devices/build.rs @@ -6,6 +6,7 @@ fn build_default_init() -> PathBuf { let manifest_dir = PathBuf::from(std::env::var_os("CARGO_MANIFEST_DIR").unwrap()); let libkrun_root = manifest_dir.join("../.."); let init_src = libkrun_root.join("init/init.c"); + let dhcp_src = libkrun_root.join("init/dhcp.c"); let out_dir = PathBuf::from(std::env::var_os("OUT_DIR").unwrap()); let init_bin = out_dir.join("init"); @@ -14,10 +15,15 @@ fn build_default_init() -> PathBuf { println!("cargo:rerun-if-env-changed=CC"); println!("cargo:rerun-if-env-changed=TIMESYNC"); println!("cargo:rerun-if-changed={}", init_src.display()); + println!("cargo:rerun-if-changed={}", dhcp_src.display()); println!( "cargo:rerun-if-changed={}", libkrun_root.join("init/jsmn.h").display() ); + println!( + "cargo:rerun-if-changed={}", + libkrun_root.join("init/dhcp.h").display() + ); let mut init_cc_flags = vec!["-O2", "-static", "-Wall"]; if std::env::var_os("TIMESYNC").as_deref() == Some(OsStr::new("1")) { @@ -35,6 +41,7 @@ fn build_default_init() -> PathBuf { .arg("-o") .arg(&init_bin) .arg(&init_src) + .arg(&dhcp_src) .status() .unwrap_or_else(|e| panic!("failed to execute {cc}: {e}")); diff --git a/src/libkrun/src/lib.rs b/src/libkrun/src/lib.rs index 4417d47c7..d42057c4b 100644 --- a/src/libkrun/src/lib.rs +++ b/src/libkrun/src/lib.rs @@ -898,6 +898,10 @@ pub unsafe extern "C" fn krun_set_data_disk(ctx_id: u32, c_disk_path: *const c_c */ #[cfg(feature = "net")] const NET_FLAG_VFKIT: u32 = 1 << 0; +#[cfg(feature = "net")] +const NET_FLAG_DHCP_CLIENT: u32 = 1 << 1; +#[cfg(feature = "net")] +const NET_FLAG_ALL: u32 = NET_FLAG_VFKIT | NET_FLAG_DHCP_CLIENT; /* Taken from uapi/linux/virtio_net.h */ #[cfg(feature = "net")] @@ -976,10 +980,10 @@ pub unsafe extern "C" fn krun_add_net_unixstream( Err(_) => return -libc::EINVAL, }; - /* The unixstream backend doesn't support any flags */ - if flags != 0 { + if (flags & !NET_FLAG_DHCP_CLIENT) != 0 { return -libc::EINVAL; } + let enable_dhcp_client: bool = flags & NET_FLAG_DHCP_CLIENT != 0; if (features & !NET_ALL_FEATURES) != 0 { return -libc::EINVAL; @@ -989,6 +993,9 @@ pub unsafe extern "C" fn krun_add_net_unixstream( Entry::Occupied(mut ctx_cfg) => { let cfg = ctx_cfg.get_mut(); create_virtio_net(cfg, backend, mac, features); + if enable_dhcp_client { + cfg.vmr.dhcp_client = true; + } } Entry::Vacant(_) => return -libc::ENOENT, } @@ -1031,10 +1038,11 @@ pub unsafe extern "C" fn krun_add_net_unixgram( return -libc::EINVAL; } - if (flags & !NET_FLAG_VFKIT) != 0 { + if (flags & !NET_FLAG_ALL) != 0 { return -libc::EINVAL; } let send_vfkit_magic: bool = flags & NET_FLAG_VFKIT != 0; + let enable_dhcp_client: bool = flags & NET_FLAG_DHCP_CLIENT != 0; let backend = if let Some(path) = path { VirtioNetBackend::UnixgramPath(path, send_vfkit_magic) @@ -1046,6 +1054,9 @@ pub unsafe extern "C" fn krun_add_net_unixgram( Entry::Occupied(mut ctx_cfg) => { let cfg = ctx_cfg.get_mut(); create_virtio_net(cfg, backend, mac, features); + if enable_dhcp_client { + cfg.vmr.dhcp_client = true; + } } Entry::Vacant(_) => return -libc::ENOENT, } @@ -1086,15 +1097,18 @@ pub unsafe extern "C" fn krun_add_net_tap( return -libc::EINVAL; } - /* The tap backend doesn't support any flags */ - if flags != 0 { + if (flags & !NET_FLAG_DHCP_CLIENT) != 0 { return -libc::EINVAL; } + let enable_dhcp_client: bool = flags & NET_FLAG_DHCP_CLIENT != 0; match CTX_MAP.lock().unwrap().entry(ctx_id) { Entry::Occupied(mut ctx_cfg) => { let cfg = ctx_cfg.get_mut(); create_virtio_net(cfg, VirtioNetBackend::Tap(tap_name), mac, features); + if enable_dhcp_client { + cfg.vmr.dhcp_client = true; + } } Entry::Vacant(_) => return -libc::ENOENT, } diff --git a/src/vmm/src/builder.rs b/src/vmm/src/builder.rs index 1c94f8b36..7e57774af 100644 --- a/src/vmm/src/builder.rs +++ b/src/vmm/src/builder.rs @@ -1059,6 +1059,11 @@ pub fn build_microvm( #[cfg(feature = "net")] attach_net_devices(&mut vmm, &vm_resources.net, intc.clone())?; + #[cfg(feature = "net")] + if vm_resources.dhcp_client { + vmm.kernel_cmdline.insert_str("KRUN_DHCP=1")?; + } + #[cfg(feature = "snd")] if vm_resources.snd_device { attach_snd_device(&mut vmm, intc.clone())?; diff --git a/src/vmm/src/resources.rs b/src/vmm/src/resources.rs index 3f7e2e146..3d3246191 100644 --- a/src/vmm/src/resources.rs +++ b/src/vmm/src/resources.rs @@ -189,6 +189,8 @@ pub struct VmResources { pub serial_consoles: Vec, /// Virtio consoles to attach to the guest pub virtio_consoles: Vec, + /// Enable the embedded dhcp client in init.c + pub dhcp_client: bool, } impl VmResources { @@ -433,6 +435,7 @@ mod tests { serial_consoles: Vec::new(), virtio_consoles: Vec::new(), kernel_console: None, + dhcp_client: false, } } From 64bf16b0d29af6e28cc48425b3519bf0ec178c55 Mon Sep 17 00:00:00 2001 From: Matej Hrica Date: Mon, 23 Mar 2026 18:16:26 +0100 Subject: [PATCH 2/3] init: use SO_BINDTODEVICE instead of temp address for DHCP Replace the temporary link-local address (169.254.1.1) workaround with SO_BINDTODEVICE. The temp address caused the kernel to use 169.254.1.1 as the source IP in DHCP packets; gvproxy then tried to reply to that address and failed with "no route to host". With this change the source IP should be 0.0.0.0, which is what RFC 2131 requires for DHCPDISCOVER. Signed-off-by: Matej Hrica --- init/dhcp.c | 39 ++++++--------------------------------- 1 file changed, 6 insertions(+), 33 deletions(-) diff --git a/init/dhcp.c b/init/dhcp.c index b7e431042..785c4e36d 100644 --- a/init/dhcp.c +++ b/init/dhcp.c @@ -291,20 +291,6 @@ int do_dhcp(const char *iface) goto cleanup; } - /* Temporary link-local address and route avoid the need for raw sockets */ - struct in_addr temp_addr; - inet_pton(AF_INET, "169.254.1.1", &temp_addr); - struct in_addr temp_gw = {.s_addr = INADDR_ANY}; - - if (mod_route4(nl_sock, iface_index, RTM_NEWROUTE, temp_gw) != 0) { - printf("couldn't add temporary route\n"); - goto cleanup; - } - if (mod_addr4(nl_sock, iface_index, RTM_NEWADDR, temp_addr, 16) != 0) { - printf("couldn't add temporary address\n"); - goto cleanup; - } - /* Send request (DHCPDISCOVER) */ sock = socket(AF_INET, SOCK_DGRAM, IPPROTO_UDP); if (sock < 0) { @@ -319,6 +305,12 @@ int do_dhcp(const char *iface) goto cleanup; } + if (setsockopt(sock, SOL_SOCKET, SO_BINDTODEVICE, iface, + strlen(iface) + 1) < 0) { + perror("setsockopt SO_BINDTODEVICE failed"); + goto cleanup; + } + /* Bind to port 68 (DHCP client) */ memset(&bind_addr, 0, sizeof(bind_addr)); bind_addr.sin_family = AF_INET; @@ -479,16 +471,6 @@ int do_dhcp(const char *iface) /* Calculate prefix length from netmask */ unsigned char prefix_len = count_leading_ones(ntohl(netmask.s_addr)); - /* Drop temporary address and route, configure what we got instead */ - if (mod_route4(nl_sock, iface_index, RTM_DELROUTE, temp_gw) != 0) { - printf("couldn't remove temporary route\n"); - goto cleanup; - } - if (mod_addr4(nl_sock, iface_index, RTM_DELADDR, temp_addr, 16) != 0) { - printf("couldn't remove temporary address\n"); - goto cleanup; - } - if (mod_addr4(nl_sock, iface_index, RTM_NEWADDR, addr, prefix_len) != 0) { printf("couldn't add the address provided by the DHCP server\n"); @@ -499,16 +481,7 @@ int do_dhcp(const char *iface) "couldn't add the default route provided by the DHCP server\n"); goto cleanup; } - set_mtu(nl_sock, iface_index, mtu); - } else { - /* Clean up: we're clearly too cool for IPv4 */ - if (mod_route4(nl_sock, iface_index, RTM_DELROUTE, temp_gw) != 0) { - printf("couldn't remove temporary route\n"); - } - if (mod_addr4(nl_sock, iface_index, RTM_DELADDR, temp_addr, 16) != 0) { - printf("couldn't remove temporary address\n"); - } } ret = 0; From f7a16880b8c5ee1f9d9cae5b1eaadd7c44befbab Mon Sep 17 00:00:00 2001 From: Matej Hrica Date: Mon, 23 Mar 2026 14:42:46 +0100 Subject: [PATCH 3/3] init: Implement fallback for DHCP servers without Rapid Commit When a server answers DHCPDISCOVER with DHCPOFFER instead of an immediate ACK, send DHCPREQUEST for the and wait for the final ACK. This makes DHCP work on macOS hosts when using gvproxy for networking. Signed-off-by: Matej Hrica --- init/dhcp.c | 285 +++++++++++++++++++++++++++++++++++++--------------- init/dhcp.h | 10 +- 2 files changed, 210 insertions(+), 85 deletions(-) diff --git a/init/dhcp.c b/init/dhcp.c index 785c4e36d..e852bded8 100644 --- a/init/dhcp.c +++ b/init/dhcp.c @@ -26,6 +26,8 @@ #include #define DHCP_BUFFER_SIZE 576 +#define DHCP_MSG_OFFER 2 +#define DHCP_MSG_ACK 5 /* Helper function to send netlink message */ static int nl_send(int sock, struct nlmsghdr *nlh) @@ -255,6 +257,143 @@ static unsigned char count_leading_ones(uint32_t val) return count; } +/* Return the DHCP message type (option 53) from a response, or 0 */ +static unsigned char get_dhcp_msg_type(const unsigned char *response, + ssize_t len) +{ + /* Walk DHCP options (TLV chain starting after the magic cookie) */ + size_t p = 240; + while (p < (size_t)len) { + unsigned char opt = response[p]; + + if (opt == 0xff) /* end */ + break; + if (opt == 0) { /* padding */ + p++; + continue; + } + + if (p + 1 >= (size_t)len) + break; + + unsigned char opt_len = response[p + 1]; + p += 2; + + if (p + opt_len > (size_t)len) + break; + if (opt == 53 && opt_len >= 1) /* Message Type */ + return response[p]; + + p += opt_len; + } + return 0; +} + +/* Parse a DHCP ACK and configure the interface. Returns 0 or -1 on error. */ +static int handle_dhcp_ack(int nl_sock, int iface_index, + const unsigned char *response, ssize_t len) +{ + /* Need at least 240 bytes (DHCP header + magic cookie) + 1 for options */ + if (len < 241) { + printf("DHCPACK too short (%zd bytes)\n", len); + return -1; + } + + /* Parse DHCP response */ + struct in_addr addr; + /* yiaddr is at offset 16-19 in network byte order */ + memcpy(&addr.s_addr, &response[16], sizeof(addr.s_addr)); + + if (addr.s_addr == INADDR_ANY) { + printf("DHCPACK has no address (yiaddr is 0.0.0.0)\n"); + return -1; + } + + struct in_addr netmask = {.s_addr = INADDR_ANY}; + struct in_addr router = {.s_addr = INADDR_ANY}; + /* Clamp MTU to passt's limit */ + uint16_t mtu = 65520; + + FILE *resolv = fopen("/etc/resolv.conf", "w"); + if (!resolv) { + perror("Failed to open /etc/resolv.conf"); + } + + /* Parse DHCP options (start at offset 240 after magic cookie) */ + size_t p = 240; + while (p < (size_t)len) { + unsigned char opt = response[p]; + + if (opt == 0xff) { + /* Option 255: End (of options) */ + break; + } + + if (opt == 0) { /* Padding */ + p++; + continue; + } + + if (p + 1 >= (size_t)len) + break; + + unsigned char opt_len = response[p + 1]; + p += 2; /* Length doesn't include code and length field itself */ + + if (p + opt_len > (size_t)len) { + /* Malformed packet, option length exceeds packet boundary */ + break; + } + + if (opt == 1 && opt_len >= 4) { + /* Option 1: Subnet Mask */ + memcpy(&netmask.s_addr, &response[p], sizeof(netmask.s_addr)); + } else if (opt == 3 && opt_len >= 4) { + /* Option 3: Router */ + memcpy(&router.s_addr, &response[p], sizeof(router.s_addr)); + } else if (opt == 6 && opt_len >= 4) { + /* Option 6: Domain Name Server */ + if (resolv) { + for (int dns_p = p; dns_p + 4 <= p + opt_len; dns_p += 4) { + fprintf(resolv, "nameserver %d.%d.%d.%d\n", response[dns_p], + response[dns_p + 1], response[dns_p + 2], + response[dns_p + 3]); + } + } + } else if (opt == 26 && opt_len >= 2) { + /* Option 26: Interface MTU */ + mtu = (response[p] << 8) | response[p + 1]; + + /* We don't know yet if IPv6 is available: don't go below 1280 B + */ + if (mtu < 1280) + mtu = 1280; + if (mtu > 65520) + mtu = 65520; + } + + p += opt_len; + } + + if (resolv) { + fclose(resolv); + } + + /* Calculate prefix length from netmask */ + unsigned char prefix_len = count_leading_ones(ntohl(netmask.s_addr)); + + if (mod_addr4(nl_sock, iface_index, RTM_NEWADDR, addr, prefix_len) != 0) { + printf("couldn't add the address provided by the DHCP server\n"); + return -1; + } + if (mod_route4(nl_sock, iface_index, RTM_NEWROUTE, router) != 0) { + printf("couldn't add the default route provided by the DHCP server\n"); + return -1; + } + set_mtu(nl_sock, iface_index, mtu); + return 0; +} + /* Send DISCOVER with Rapid Commit, process ACK, configure address and route */ int do_dhcp(const char *iface) { @@ -386,106 +525,90 @@ int do_dhcp(const char *iface) goto cleanup; } - /* Get and process response (DHCPACK) if any */ + /* Get response: DHCPACK (Rapid Commit) or DHCPOFFER */ struct sockaddr_in from_addr; socklen_t from_len = sizeof(from_addr); ssize_t len = recvfrom(sock, response, sizeof(response), 0, (struct sockaddr *)&from_addr, &from_len); - close(sock); - sock = -1; + if (len <= 0) + goto done; /* No DHCP response — not an error, VM may be IPv6-only */ - if (len > 0) { - /* Parse DHCP response */ - struct in_addr addr; - /* yiaddr is at offset 16-19 in network byte order */ - memcpy(&addr.s_addr, &response[16], sizeof(addr.s_addr)); + unsigned char msg_type = get_dhcp_msg_type(response, len); - struct in_addr netmask = {.s_addr = INADDR_ANY}; - struct in_addr router = {.s_addr = INADDR_ANY}; - /* Clamp MTU to passt's limit */ - uint16_t mtu = 65520; - - FILE *resolv = fopen("/etc/resolv.conf", "w"); - if (!resolv) { - perror("Failed to open /etc/resolv.conf"); + if (msg_type == DHCP_MSG_ACK) { + /* Rapid Commit — server sent ACK directly */ + close(sock); + sock = -1; + if (handle_dhcp_ack(nl_sock, iface_index, response, len) != 0) + goto cleanup; + } else if (msg_type == DHCP_MSG_OFFER) { + /* + * DHCPOFFER — complete the 4-way handshake by sending DHCPREQUEST + * and waiting for DHCPACK. Servers without Rapid Commit (e.g. + * gvproxy) require this. + */ + struct in_addr offered_addr; + memcpy(&offered_addr.s_addr, &response[16], + sizeof(offered_addr.s_addr)); + + /* Build DHCPREQUEST */ + memset(request.options, 0, sizeof(request.options)); + opt_offset = 0; + + /* Option 53: DHCP Message Type = REQUEST (3) */ + request.options[opt_offset++] = 53; + request.options[opt_offset++] = 1; + request.options[opt_offset++] = 3; + + /* Option 50: Requested IP Address */ + request.options[opt_offset++] = 50; + request.options[opt_offset++] = 4; + memcpy(&request.options[opt_offset], &offered_addr.s_addr, 4); + opt_offset += 4; + + /* Option 54: Server Identifier (from_addr) */ + request.options[opt_offset++] = 54; + request.options[opt_offset++] = 4; + memcpy(&request.options[opt_offset], &from_addr.sin_addr.s_addr, 4); + opt_offset += 4; + + /* Option 255: End */ + request.options[opt_offset++] = 0xff; + + if (sendto(sock, &request, sizeof(request), 0, + (struct sockaddr *)&dest_addr, sizeof(dest_addr)) < 0) { + perror("sendto DHCPREQUEST failed"); + goto cleanup; } - /* Parse DHCP options (start at offset 240 after magic cookie) */ - size_t p = 240; - while (p < (size_t)len) { - unsigned char opt = response[p]; + from_len = sizeof(from_addr); + len = recvfrom(sock, response, sizeof(response), 0, + (struct sockaddr *)&from_addr, &from_len); - if (opt == 0xff) { - /* Option 255: End (of options) */ - break; - } - - if (opt == 0) { /* Padding */ - p++; - continue; - } - - unsigned char opt_len = response[p + 1]; - p += 2; /* Length doesn't include code and length field itself */ - - if (p + opt_len > (size_t)len) { - /* Malformed packet, option length exceeds packet boundary */ - break; - } - - if (opt == 1) { - /* Option 1: Subnet Mask */ - memcpy(&netmask.s_addr, &response[p], sizeof(netmask.s_addr)); - } else if (opt == 3) { - /* Option 3: Router */ - memcpy(&router.s_addr, &response[p], sizeof(router.s_addr)); - } else if (opt == 6) { - /* Option 6: Domain Name Server */ - if (resolv) { - for (int dns_p = p; dns_p + 3 < p + opt_len; dns_p += 4) { - fprintf(resolv, "nameserver %d.%d.%d.%d\n", - response[dns_p], response[dns_p + 1], - response[dns_p + 2], response[dns_p + 3]); - } - } - } else if (opt == 26) { - /* Option 26: Interface MTU */ - mtu = (response[p] << 8) | response[p + 1]; - - /* We don't know yet if IPv6 is available: don't go below 1280 B - */ - if (mtu < 1280) - mtu = 1280; - if (mtu > 65520) - mtu = 65520; - } - - p += opt_len; - } + close(sock); + sock = -1; - if (resolv) { - fclose(resolv); + if (len <= 0) { + printf("no DHCPACK received\n"); + goto cleanup; } - /* Calculate prefix length from netmask */ - unsigned char prefix_len = count_leading_ones(ntohl(netmask.s_addr)); - - if (mod_addr4(nl_sock, iface_index, RTM_NEWADDR, addr, prefix_len) != - 0) { - printf("couldn't add the address provided by the DHCP server\n"); + if (get_dhcp_msg_type(response, len) != DHCP_MSG_ACK) { + printf("expected DHCPACK but got message type %d\n", + get_dhcp_msg_type(response, len)); goto cleanup; } - if (mod_route4(nl_sock, iface_index, RTM_NEWROUTE, router) != 0) { - printf( - "couldn't add the default route provided by the DHCP server\n"); + + if (handle_dhcp_ack(nl_sock, iface_index, response, len) != 0) goto cleanup; - } - set_mtu(nl_sock, iface_index, mtu); + } else { + printf("unexpected DHCP message type %d\n", msg_type); + goto cleanup; } +done: ret = 0; - cleanup: if (sock >= 0) { close(sock); diff --git a/init/dhcp.h b/init/dhcp.h index 2a4abfb1a..39e20ead7 100644 --- a/init/dhcp.h +++ b/init/dhcp.h @@ -37,15 +37,17 @@ struct dhcp_packet { * Perform DHCP discovery and configuration for a network interface * * This function: - * 1. Sets up a temporary link-local address (169.254.1.1/16) + * 1. Binds a UDP socket to the interface using SO_BINDTODEVICE * 2. Sends a DHCP DISCOVER message with Rapid Commit option - * 3. Waits up to 100ms for a DHCP ACK response - * 4. Parses the response and configures: + * 3. Waits up to 100ms for a response: + * - If DHCPACK (Rapid Commit): applies configuration directly + * - If DHCPOFFER: sends DHCPREQUEST and waits for DHCPACK + * - If no response: returns success (VM may be IPv6-only) + * 4. Parses the ACK and configures: * - IPv4 address with appropriate prefix length * - Default gateway route * - DNS servers (overwriting /etc/resolv.conf) * - Interface MTU - * 5. Cleans up temporary configuration * * Parameters: * iface - The name of the network interface to be configured.