From 589e4eda02ef383a8fb2de39633b6d4ac8f82220 Mon Sep 17 00:00:00 2001 From: Steven Fairchild Date: Wed, 28 Jun 2023 10:13:28 -0400 Subject: [PATCH] Fix resolv.conf during Azure host servicing If the network interfaces are brought up/down without restarting NetworkManager, our resolv.conf will be overwritten by NetworkManager. This can occur during [Azure Host Servicing](https://learn.microsoft.com/en-us/azure/developer/intro/hosting-apps-on-azure) events. Addiontal information is written to the system logs to assist with SRE troubleshooting. By using NetworkManager dispatcher scripts, we can restart dnsmasq when certian events happen. --- pkg/operator/controllers/dnsmasq/dnsmasq.go | 72 +++++++++++++++++++++ 1 file changed, 72 insertions(+) diff --git a/pkg/operator/controllers/dnsmasq/dnsmasq.go b/pkg/operator/controllers/dnsmasq/dnsmasq.go index 9182fb7c040..c28d6ab832a 100644 --- a/pkg/operator/controllers/dnsmasq/dnsmasq.go +++ b/pkg/operator/controllers/dnsmasq/dnsmasq.go @@ -110,6 +110,46 @@ chmod 0744 /etc/resolv.conf.dnsmasq /bin/rm $TMPNETRESOLV /bin/rm $TMPSELFRESOLV {{ end }} + +{{ define "99-dnsmasq-restart" }} +#!/bin/sh +# This is a NetworkManager dispatcher script to restart dnsmasq +# in the event of a network interface change (e. g. host servicing event https://learn.microsoft.com/en-us/azure/developer/intro/hosting-apps-on-azure) +# this will restart dnsmasq, reapplying our /etc/resolv.conf file and overwriting any modifications made by NetworkManager + +interface=$1 +action=$2 + +log() { + logger -i "$0" -t '99-DNSMASQ-RESTART SCRIPT' "$@" +} + +if [[ $interface == eth* && $action == "up" ]] || [[ $interface == eth* && $action == "down" ]] || [ $interface == enP* && $action == "up" ] || [ $interface == enP* && $action == "down" ]; then + log "$action happened on $interface, connection state is now $CONNECTIVITY_STATE" + log "restarting dnsmasq now" + if systemctl restart dnsmasq; then + log "dnsmasq successfully restarted" + else + log "failed to restart dnsmasq" + fi + + # log dns configuration information relevant to SRE while troubleshooting + # The line break used here is important for formatting + log "/etc/resolv.conf contents + + $(cat /etc/resolv.conf)" + + log "$(echo -n \"/etc/resolv.conf file metadata: \") $(ls -lZ /etc/resolv.conf)" + + log "/etc/resolv.conf.dnsmasq contents + + $(cat /etc/resolv.conf.dnsmasq)" + + log "$(echo -n "/etc/resolv.conf.dnsmasq file metadata: ") $(ls -lZ /etc/resolv.conf.dnsmasq)" +fi + +exit 0 +{{ end }} `)) func config(clusterDomain, apiIntIP, ingressIP string, gatewayDomains []string, gatewayPrivateEndpointIP string) ([]byte, error) { @@ -157,6 +197,17 @@ func startpre() ([]byte, error) { return buf.Bytes(), nil } +func nmDispatcherRestartDnsmasq() ([]byte, error) { + buf := &bytes.Buffer{} + + err := t.ExecuteTemplate(buf, "99-dnsmasq-restart", nil) + if err != nil { + return nil, err + } + + return buf.Bytes(), nil +} + func ignition2Config(clusterDomain, apiIntIP, ingressIP string, gatewayDomains []string, gatewayPrivateEndpointIP string) (*ign2types.Config, error) { service, err := service() if err != nil { @@ -173,6 +224,11 @@ func ignition2Config(clusterDomain, apiIntIP, ingressIP string, gatewayDomains [ return nil, err } + nmDispatcherRestartDnsmasq, err := nmDispatcherRestartDnsmasq() + if err != nil { + return nil, err + } + return &ign2types.Config{ Ignition: ign2types.Ignition{ Version: ign2types.MaxVersion.String(), @@ -211,6 +267,22 @@ func ignition2Config(clusterDomain, apiIntIP, ingressIP string, gatewayDomains [ Mode: ignutil.IntToPtr(0744), }, }, + { + Node: ign2types.Node{ + Filesystem: "root", + Overwrite: ignutil.BoolToPtr(true), + Path: "/etc/NetworkManager/dispatcher.d/99-dnsmasq-restart", + User: &ign2types.NodeUser{ + Name: *ignutil.StrToPtr("root"), + }, + }, + FileEmbedded1: ign2types.FileEmbedded1{ + Contents: ign2types.FileContents{ + Source: *ignutil.StrToPtr(dataurl.EncodeBytes(nmDispatcherRestartDnsmasq)), + }, + Mode: ignutil.IntToPtr(0744), + }, + }, }, }, Systemd: ign2types.Systemd{