Unix & Linux Asked by Philip Couling on December 29, 2021
I’ve just found a bug in an embedded linux machine caused by a broken /etc/resov.conf
. Specifically this was a broken symlink. The thing I can’t explain is why some (many) of them ever worked.
The code is running inside an Alpine Linux chroot. The Debian "host" has a present and correct resolv.conf, but link iside the chroot environment was broken. I’ve also discovered that Alpine does not, by default, come with an nsswitch.conf. All hosts have the same Alpine linux chroot environment, they only differ by host version of Debian and network (physical location).
The host and chroot environment share /run
/sys
/proc
/dev
only.
Despite this, some devices could resolve eternal FQDNs. I had thought this might be related to shared DNS cache, but have ruled this out. From inside the chroot:
$ ping unix.stackexchange.com
PING unix.stackexchange.com (151.101.193.69): 56 data bytes
64 bytes from 151.101.193.69: seq=0 ttl=58 time=3.962 ms
There is no chance that these embedded devices have been asked to resolve unix.stackexchange.com before so it should not be in their DNS cache.
We are currently investigating to see if the difference between "working" and "broken" devices is the network they are on or subtle differences in release versions. I’ll update the question if we can get to the bottom of that detail.
Can anyone explain why some of these devices might be resolving FQDNs without a valid resolv.conf?
Additional details on the chroot
apk add
.nsswitch.conf
and only a very broken link for /etc/resolv.conf
Addational details on the host:
Alpine Linux uses Busybox and uClibc rather than the better known glibc. There's a quirk in uClibc's implementation of getaddrinfo() where it will send DNS requests to localhost:53
when it can find no other name servers in /etc/resolv.conf
.
This explains why some units were performing this way and not others despite having identical chroot environments.
It seems dnsmasq
was installed on some hosts and not others. Presumably due to the changing decisions of the BeagleBone developers.
The trace for ping -c1 unix.stackexchange.com
belo shows Alpine / Busybox / ping attempting to open /etc/resolv.conf
and when it fails it sends a request to 127.0.0.1:53
. It then get's a response back for that address.
execve("/bin/ping", ["ping", "-c1", "unix.stackexchange.com"], 0xbec18768 /* 20 vars */) = 0
set_tls(0xb6f215ec) = 0
set_tid_address(0xb6f221a0) = 18047
mprotect(0x536000, 8192, PROT_READ) = 0
getuid32() = 0
getpid() = 18047
open("/etc/hosts", O_RDONLY|O_LARGEFILE|O_CLOEXEC) = 3
fcntl64(3, F_SETFD, FD_CLOEXEC) = 0
read(3, "127.0.0.1tlocalhost localhost.lo"..., 1024) = 79
read(3, "", 1024) = 0
close(3) = 0
open("/etc/resolv.conf", O_RDONLY|O_LARGEFILE|O_CLOEXEC) = -1 ENOENT (No such file or directory)
clock_gettime(CLOCK_REALTIME, {tv_sec=1595644595, tv_nsec=840201586}) = 0
clock_gettime(CLOCK_REALTIME, {tv_sec=1595644595, tv_nsec=840992292}) = 0
socket(AF_INET, SOCK_DGRAM|SOCK_CLOEXEC|SOCK_NONBLOCK, IPPROTO_IP) = 3
bind(3, {sa_family=AF_INET, sin_port=htons(0), sin_addr=inet_addr("0.0.0.0")}, 16) = 0
clock_gettime(CLOCK_REALTIME, {tv_sec=1595644595, tv_nsec=843970581}) = 0
sendto(3, "2472061 1 4unixrstackexchange3"..., 40, MSG_NOSIGNAL, {sa_family=AF_INET, sin_port=htons(53), sin_addr=inet_addr("127.0.0.1")}, 16) = 40
sendto(3, "270D1 1 4unixrstackexchange3"..., 40, MSG_NOSIGNAL, {sa_family=AF_INET, sin_port=htons(53), sin_addr=inet_addr("127.0.0.1")}, 16) = 40
poll([{fd=3, events=POLLIN}], 1, 2500) = 1 ([{fd=3, revents=POLLIN}])
recvfrom(3, "247206201200 1 4 4unixrstackexchange3"..., 512, 0, {sa_family=AF_INET, sin_port=htons(53), sin_addr=inet_addr("127.0.0.1")}, [16]) = 104
recvfrom(3, "270D201200 1 1 4unixrstackexchange3"..., 512, 0, {sa_family=AF_INET, sin_port=htons(53), sin_addr=inet_addr("127.0.0.1")}, [16]) = 124
close(3) = 0
ioctl(1, TIOCGWINSZ, {ws_row=38, ws_col=178, ws_xpixel=0, ws_ypixel=0}) = 0
writev(1, [{iov_base="PING unix.stackexchange.com (151"..., iov_len=47}, {iov_base=" data bytesn", iov_len=12}], 2PING unix.stackexchange.com (151.101.65.69): 56 data bytes
) = 59
socket(AF_INET, SOCK_RAW, IPPROTO_ICMP) = 3
dup2(3, 0) = 0
close(3) = 0
setsockopt(0, SOL_SOCKET, SO_BROADCAST, [1], 4) = 0
setsockopt(0, SOL_SOCKET, SO_RCVBUF, [7280], 4) = 0
rt_sigprocmask(SIG_UNBLOCK, [RT_1 RT_2], NULL, 8) = 0
rt_sigaction(SIGINT, {sa_handler=0x475b3c, sa_mask=[], sa_flags=SA_RESTORER|SA_RESTART, sa_restorer=0xb6ec89fc}, {sa_handler=SIG_DFL, sa_mask=[], sa_flags=0}, 8) = 0
clock_gettime(CLOCK_MONOTONIC, {tv_sec=4280439, tv_nsec=868933133}) = 0
sendto(0, "10 Zr177F 51631236 "..., 64, 0, {sa_family=AF_INET, sin_port=htons(0), sin_addr=inet_addr("151.101.65.69")}, 28) = 64
rt_sigaction(SIGALRM, {sa_handler=0x475b3c, sa_mask=[], sa_flags=SA_RESTORER|SA_RESTART, sa_restorer=0xb6ec89fc}, {sa_handler=SIG_DFL, sa_mask=[], sa_flags=0}, 8) = 0
setitimer(ITIMER_REAL, {it_interval={tv_sec=0, tv_usec=0}, it_value={tv_sec=10, tv_usec=0}}, {it_interval={tv_sec=0, tv_usec=0}, it_value={tv_sec=0, tv_usec=0}}) = 0
recvfrom(0, "E T8<