From 4d65cd247a6966ac15f273ac7568e1451e087f3a Mon Sep 17 00:00:00 2001 From: Kroese Date: Tue, 11 Jun 2024 01:46:34 +0200 Subject: [PATCH] feat: Implement usermode networking (#109) --- readme.md | 2 +- src/boot.sh | 8 ++-- src/disk.sh | 29 +++++------ src/install.sh | 1 + src/network.sh | 127 +++++++++++++++++++++++++++++++++++++------------ 5 files changed, 114 insertions(+), 53 deletions(-) diff --git a/readme.md b/readme.md index 7cd9f12..8d5948b 100644 --- a/readme.md +++ b/readme.md @@ -15,7 +15,7 @@ Docker container for running ARM-based virtual machines using QEMU, for devices It allows you to create VM's which behave just like normal containers, meaning you can manage them using all your existing tools (like Portainer) and configure them in a language (YAML) you are already familiar with. -This greatly reduces the learning curve and also eliminates the need for a dedicated Proxmox or ESXi server in many cases. +This greatly reduces the learning curve and eliminates the need for a dedicated Proxmox or ESXi server in many cases. It uses high-performance QEMU options (like KVM acceleration, kernel-mode networking, IO threading, etc.) to achieve near-native speed. diff --git a/src/boot.sh b/src/boot.sh index 0a6265e..6616a6d 100644 --- a/src/boot.sh +++ b/src/boot.sh @@ -10,22 +10,22 @@ BOOT_DESC="" DIR="/usr/share/qemu" case "${BOOT_MODE,,}" in - uefi) + "uefi" ) ROM="AAVMF_CODE.no-secboot.fd" VARS="AAVMF_VARS.fd" ;; - secure) + "secure" ) SECURE="on" BOOT_DESC=" securely" ROM="AAVMF_CODE.secboot.fd" VARS="AAVMF_VARS.fd" ;; - windows) + "windows" ) ROM="AAVMF_CODE.no-secboot.fd" VARS="AAVMF_VARS.fd" BOOT_OPTS="-rtc base=localtime" ;; - windows_secure) + "windows_secure" ) SECURE="on" BOOT_DESC=" securely" ROM="AAVMF_CODE.ms.fd" diff --git a/src/disk.sh b/src/disk.sh index 19b87b0..c66eb17 100644 --- a/src/disk.sh +++ b/src/disk.sh @@ -532,19 +532,11 @@ case "${DISK_TYPE,,}" in esac if [ -z "${MEDIA_TYPE:-}" ]; then - case "${DISK_TYPE,,}" in - "ide" | "usb" | "scsi" ) - if [[ "${MACHINE,,}" == "virt" ]]; then - MEDIA_TYPE="auto" - else - MEDIA_TYPE="$DISK_TYPE" - fi ;; - "blk" | "auto" ) - if [[ "${MACHINE,,}" != "virt" ]] && [[ "${MACHINE,,}" != "pc-i440fx-2"* ]]; then - MEDIA_TYPE="ide" - else - MEDIA_TYPE="auto" - fi ;; + case "${MACHINE,,}" in + "virt" | "pc-q35-2"* | "pc-i440fx-2"* ) + MEDIA_TYPE="auto" ;; + * ) + [[ "${DISK_TYPE,,}" != "blk" ]] && MEDIA_TYPE="$DISK_TYPE" || MEDIA_TYPE="ide" ;; esac fi @@ -561,11 +553,12 @@ DRIVERS="/drivers.iso" [ ! -f "$DRIVERS" ] || [ ! -s "$DRIVERS" ] && DRIVERS="$STORAGE/drivers.iso" if [ -f "$DRIVERS" ] && [ -s "$DRIVERS" ]; then - if [[ "${MACHINE,,}" != "virt" ]] && [[ "${MACHINE,,}" != "pc-i440fx-2"* ]]; then - DRIVER_TYPE="ide" - else - DRIVER_TYPE="auto" - fi + case "${MACHINE,,}" in + "virt" | "pc-q35-2"* | "pc-i440fx-2"* ) + DRIVER_TYPE="auto" ;; + * ) + DRIVER_TYPE="ide" ;; + esac DISK_OPTS+=$(addMedia "$DRIVERS" "$DRIVER_TYPE" "1" "" "0x6") fi diff --git a/src/install.sh b/src/install.sh index b520257..f56d437 100644 --- a/src/install.sh +++ b/src/install.sh @@ -46,6 +46,7 @@ info "$msg..." && html "$msg..." fKill "progress.sh" msg="Failed to download $BOOT" +(( rc == 3 )) && error "$msg , cannot write file (disk full?)" && exit 60 (( rc == 4 )) && error "$msg , network failure!" && exit 60 (( rc == 8 )) && error "$msg , server issued an error response!" && exit 60 (( rc != 0 )) && error "$msg , reason: $rc" && exit 60 diff --git a/src/network.sh b/src/network.sh index f96619b..e1a6e77 100644 --- a/src/network.sh +++ b/src/network.sh @@ -7,6 +7,7 @@ set -Eeuo pipefail : "${DHCP:="N"}" : "${NETWORK:="Y"}" : "${HOST_PORTS:=""}" +: "${USER_PORTS:=""}" : "${VM_NET_DEV:=""}" : "${VM_NET_TAP:="qemu"}" @@ -38,7 +39,7 @@ configureDHCP() { if (( rc != 0 )); then error "Cannot create macvtap interface. Please make sure that the network type is 'macvlan' and not 'ipvlan'," - error "that your kernel is recent (>4) and supports it, and that the container has the NET_ADMIN capability set." && exit 16 + error "that your kernel is recent (>4) and supports it, and that the container has the NET_ADMIN capability set." && return 1 fi while ! ip link set "$VM_NET_TAP" up; do @@ -52,28 +53,28 @@ configureDHCP() { # Create dev file (there is no udev in container: need to be done manually) IFS=: read -r MAJOR MINOR < <(cat /sys/devices/virtual/net/"$VM_NET_TAP"/tap*/dev) - (( MAJOR < 1)) && error "Cannot find: sys/devices/virtual/net/$VM_NET_TAP" && exit 18 + (( MAJOR < 1)) && error "Cannot find: sys/devices/virtual/net/$VM_NET_TAP" && return 1 [[ ! -e "$TAP_PATH" ]] && [[ -e "/dev0/${TAP_PATH##*/}" ]] && ln -s "/dev0/${TAP_PATH##*/}" "$TAP_PATH" if [[ ! -e "$TAP_PATH" ]]; then { mknod "$TAP_PATH" c "$MAJOR" "$MINOR" ; rc=$?; } || : - (( rc != 0 )) && error "Cannot mknod: $TAP_PATH ($rc)" && exit 20 + (( rc != 0 )) && error "Cannot mknod: $TAP_PATH ($rc)" && return 1 fi { exec 30>>"$TAP_PATH"; rc=$?; } 2>/dev/null || : if (( rc != 0 )); then - error "Cannot create TAP interface ($rc). $ADD_ERR --device-cgroup-rule='c *:* rwm'" && exit 21 + error "Cannot create TAP interface ($rc). $ADD_ERR --device-cgroup-rule='c *:* rwm'" && return 1 fi { exec 40>>/dev/vhost-net; rc=$?; } 2>/dev/null || : if (( rc != 0 )); then - error "VHOST can not be found ($rc). $ADD_ERR --device=/dev/vhost-net" && exit 22 + error "VHOST can not be found ($rc). $ADD_ERR --device=/dev/vhost-net" && return 1 fi - NET_OPTS="-netdev tap,id=hostnet0,vhost=on,vhostfd=40,fd=30" + NET_OPTS="-netdev tap,id=hostnet0,vhost=on,vhostfd=40,fd=30,script=no,downscript=no" return 0 } @@ -96,13 +97,34 @@ configureDNS() { DNSMASQ_OPTS=$(echo "$DNSMASQ_OPTS" | sed 's/\t/ /g' | tr -s ' ' | sed 's/^ *//') if ! $DNSMASQ ${DNSMASQ_OPTS:+ $DNSMASQ_OPTS}; then - error "Failed to start dnsmasq, reason: $?" && exit 29 + error "Failed to start dnsmasq, reason: $?" && return 1 fi return 0 } -getPorts() { +getUserPorts() { + + local args="" + local list=$1 + local ssh="22" + local rdp="3389" + + [ -z "$list" ] && list="$ssh,$rdp" || list+=",$ssh,$rdp" + + list="${list/,/ }" + list="${list## }" + list="${list%% }" + + for port in $list; do + args+="hostfwd=tcp::$port-$VM_NET_IP:$port," + done + + echo "${args%?}" + return 0 +} + +getHostPorts() { local list=$1 local vnc="5900" @@ -125,6 +147,17 @@ getPorts() { return 0 } +configureUser() { + + NET_OPTS="-netdev user,id=hostnet0,host=${VM_NET_IP%.*}.1,net=${VM_NET_IP%.*}.0/24,dhcpstart=$VM_NET_IP,hostname=$VM_NET_HOST" + + local forward + forward=$(getUserPorts "$USER_PORTS") + [ -n "$forward" ] && NET_OPTS+=",$forward" + + return 0 +} + configureNAT() { # Create the necessary file structure for /dev/net/tun @@ -136,14 +169,14 @@ configureNAT() { fi if [ ! -c /dev/net/tun ]; then - error "TUN device missing. $ADD_ERR --device /dev/net/tun --cap-add NET_ADMIN" && exit 25 + error "TUN device missing. $ADD_ERR --device /dev/net/tun --cap-add NET_ADMIN" && return 1 fi # Check port forwarding flag if [[ $(< /proc/sys/net/ipv4/ip_forward) -eq 0 ]]; then { sysctl -w net.ipv4.ip_forward=1 > /dev/null; rc=$?; } || : if (( rc != 0 )) || [[ $(< /proc/sys/net/ipv4/ip_forward) -eq 0 ]]; then - error "IP forwarding is disabled. $ADD_ERR --sysctl net.ipv4.ip_forward=1" && exit 24 + error "IP forwarding is disabled. $ADD_ERR --sysctl net.ipv4.ip_forward=1" && return 1 fi fi @@ -155,10 +188,12 @@ configureNAT() { { ip link add dev dockerbridge type bridge ; rc=$?; } || : if (( rc != 0 )); then - error "Failed to create bridge. $ADD_ERR --cap-add NET_ADMIN" && exit 23 + error "Failed to create bridge. $ADD_ERR --cap-add NET_ADMIN" && return 1 fi - ip address add "${VM_NET_IP%.*}.1/24" broadcast "${VM_NET_IP%.*}.255" dev dockerbridge + if ! ip address add "${VM_NET_IP%.*}.1/24" broadcast "${VM_NET_IP%.*}.255" dev dockerbridge; then + error "Failed to add IP address!" && return 1 + fi while ! ip link set dockerbridge up; do info "Waiting for IP address to become available..." @@ -167,7 +202,7 @@ configureNAT() { # QEMU Works with taps, set tap to the bridge created if ! ip tuntap add dev "$VM_NET_TAP" mode tap; then - error "$tuntap" && exit 31 + error "$tuntap" && return 1 fi while ! ip link set "$VM_NET_TAP" up promisc on; do @@ -175,35 +210,44 @@ configureNAT() { sleep 2 done - ip link set dev "$VM_NET_TAP" master dockerbridge + if ! ip link set dev "$VM_NET_TAP" master dockerbridge; then + error "Failed to set IP link!" && return 1 + fi # Add internet connection to the VM update-alternatives --set iptables /usr/sbin/iptables-legacy > /dev/null update-alternatives --set ip6tables /usr/sbin/ip6tables-legacy > /dev/null - exclude=$(getPorts "$HOST_PORTS") + exclude=$(getHostPorts "$HOST_PORTS") if ! iptables -t nat -A POSTROUTING -o "$VM_NET_DEV" -j MASQUERADE; then - error "$tables" && exit 30 + error "$tables" && return 1 fi # shellcheck disable=SC2086 - iptables -t nat -A PREROUTING -i "$VM_NET_DEV" -d "$IP" -p tcp${exclude} -j DNAT --to "$VM_NET_IP" - iptables -t nat -A PREROUTING -i "$VM_NET_DEV" -d "$IP" -p udp -j DNAT --to "$VM_NET_IP" + if ! iptables -t nat -A PREROUTING -i "$VM_NET_DEV" -d "$IP" -p tcp${exclude} -j DNAT --to "$VM_NET_IP"; then + error "Failed to configure IP tables!" && return 1 + fi + + if ! iptables -t nat -A PREROUTING -i "$VM_NET_DEV" -d "$IP" -p udp -j DNAT --to "$VM_NET_IP"; then + error "Failed to configure IP tables!" && return 1 + fi if (( KERNEL > 4 )); then # Hack for guest VMs complaining about "bad udp checksums in 5 packets" iptables -A POSTROUTING -t mangle -p udp --dport bootpc -j CHECKSUM --checksum-fill > /dev/null 2>&1 || true fi - NET_OPTS="-netdev tap,ifname=$VM_NET_TAP,script=no,downscript=no,id=hostnet0" + NET_OPTS="-netdev tap,id=hostnet0,ifname=$VM_NET_TAP" if [ -c /dev/vhost-net ]; then { exec 40>>/dev/vhost-net; rc=$?; } 2>/dev/null || : (( rc == 0 )) && NET_OPTS+=",vhost=on,vhostfd=40" fi - configureDNS + NET_OPTS+=",script=no,downscript=no" + + ! configureDNS && return 1 return 0 } @@ -214,7 +258,7 @@ closeNetwork() { nginx -s stop 2> /dev/null fWait "nginx" - [[ "$NETWORK" != [Yy1]* ]] && return 0 + [[ "$NETWORK" == [Nn]* ]] && return 0 exec 30<&- || true exec 40<&- || true @@ -229,6 +273,8 @@ closeNetwork() { local pid="/var/run/dnsmasq.pid" [ -s "$pid" ] && pKill "$(<"$pid")" + [[ "${NETWORK,,}" == "user"* ]] && return 0 + ip link set "$VM_NET_TAP" down promisc off || true ip link delete "$VM_NET_TAP" || true @@ -250,8 +296,7 @@ checkOS() { [[ "${name,,}" == *"microsoft"* ]] && os="Windows" if [ -n "$os" ]; then - error "You are using Docker Desktop for $os which does not support macvlan, please revert to bridge networking!" - return 1 + warn "you are using Docker Desktop for $os which does not support macvlan, please revert to bridge networking!" fi return 0 @@ -307,7 +352,7 @@ getInfo() { # Configure Network # ###################################### -if [[ "$NETWORK" != [Yy1]* ]]; then +if [[ "$NETWORK" == [Nn]* ]]; then NET_OPTS="" return 0 fi @@ -323,23 +368,45 @@ fi if [[ "$DHCP" == [Yy1]* ]]; then - ! checkOS && [[ "$DEBUG" != [Yy1]* ]] && exit 19 + checkOS if [[ "$IP" == "172."* ]]; then warn "container IP starts with 172.* which is often a sign that you are not on a macvlan network (required for DHCP)!" fi - # Configuration for DHCP IP - configureDHCP + # Configure for macvtap interface + ! configureDHCP && exit 20 else if [[ "$IP" != "172."* ]] && [[ "$IP" != "10.8"* ]] && [[ "$IP" != "10.9"* ]]; then - ! checkOS && [[ "$DEBUG" != [Yy1]* ]] && exit 19 + checkOS fi - # Configuration for static IP - configureNAT + if [[ "${NETWORK,,}" != "user"* ]]; then + + # Configure for tap interface + if ! configureNAT; then + + NETWORK="user" + warn "falling back to usermode networking (slow)!" + + ip link set "$VM_NET_TAP" down promisc off &> null || true + ip link delete "$VM_NET_TAP" &> null || true + + ip link set dockerbridge down &> null || true + ip link delete dockerbridge &> null || true + + fi + + fi + + if [[ "${NETWORK,,}" == "user"* ]]; then + + # Configure for usermode networking (slirp) + ! configureUser && exit 24 + + fi fi