slirp4netns — How does it work

M Castelino
3 min readApr 11, 2019

--

slirp4netns provides user-mode networking (“slirp”) for unprivileged network namespaces. This is heavily used in rootless-containers.

Let us examine how it actually works. You can do this without looking at the code (which you really should) using some simple linux tools. What you will find will expose you to some really nice linux features you may not be aware of.

Create a process with its own network and pid namespace

$ unshare --user --map-root-user --net --mount
[root@incensed-gawain ~]# echo $$
2646

Run the slirp process on the host and connect it to the process namespace

strace the process to examine what happens

strace -f slirp4netns --configure --mtu=65520 2646 tap0

It creates a socketpair and clones into the child

The fd is still available in the child process and accessible across the network namespace boundary

Wait for the child to communicate back on the socketpair

socketpair(AF_UNIX, SOCK_STREAM, 0, [3, 4]) = 0
clone(child_stack=NULL, flags=CLONE_CHILD_CLEARTID|CLONE_CHILD_SETTID|SIGCHLD, child_tidptr=0x7f5bc798b810) = 2667
wait4(2667, strace: Process 2667 attached
<unfinished ...>

Child

Creates the tap interface

[pid  2667] openat(AT_FDCWD, "/proc/2646/ns/user", O_RDONLY) = 5
[pid 2667] openat(AT_FDCWD, "/proc/2646/ns/net", O_RDONLY) = 6
[pid 2667] setns(5, CLONE_NEWUSER) = 0
[pid 2667] setns(6, CLONE_NEWNET) = 0
[pid 2667] close(5) = 0
[pid 2667] close(6) = 0
[pid 2667] openat(AT_FDCWD, "/dev/net/tun", O_RDWR) = 5

The tap fd in the child is 5

[pid  2667] ioctl(5, TUNSETIFF, 0x7ffd60075390) = 0
[pid 2667] socket(AF_INET, SOCK_DGRAM, IPPROTO_IP) = 6
[pid 2667] ioctl(6, SIOCSIFFLAGS, {ifr_name="tap0", ifr_flags=IFF_UP|IFF_RUNNING}) = 0
[pid 2667] ioctl(6, SIOCSIFMTU, {ifr_name="tap0", ifr_mtu=65520}) = 0
[pid 2667] ioctl(6, SIOCSIFADDR, {ifr_name="tap0", ifr_addr={sa_family=AF_INET, sin_port=htons(0), sin_addr=inet_addr("10.0.2.100")}}) = 0
[pid 2667] ioctl(6, SIOCSIFNETMASK, {ifr_name="tap0", ifr_netmask={sa_family=AF_INET, sin_port=htons(0), sin_addr=inet_addr("255.255.255.0")}}) = 0
[pid 2667] ioctl(6, SIOCADDRT, 0x7ffd60075390) = 0

Use out of band data to send the fd 5 back to the parent process running on the host

https://linux.die.net/man/2/sendmsg

MSG_OOB
Sends out-of-band data on sockets that support this notion (e.g., of type SOCK_STREAM); the underlying protocol must also support out-of-band data
[pid 2667] sendmsg(4, {msg_name=NULL, msg_namelen=0, msg_iov=[{iov_base="\0", iov_len=1}], msg_iovlen=1, msg_control=[{cmsg_len=20, cmsg_level=SOL_SOCKET, cmsg_type=SCM_RIGHTS, cmsg_data=[5]}], msg_controllen=20, msg_flags=0}, 0) = 1
[pid 2667] write(2, "sent tapfd=5 for tap0\n", 22sent tapfd=5 for tap0
) = 22
[pid 2667] close(4) = 0
[pid 2667] exit_group(0) = ?
[pid 2667] +++ exited with 0 +++

Parent

Picks up the fd 5. This fd is read from, to get packets from the container.

That is how network traffic makes it across the network ns even though tap interfaces cannot cross a network namespace boundary

<... wait4 resumed> [{WIFEXITED(s) && WEXITSTATUS(s) == 0}], 0, NULL) = 2667
--- SIGCHLD {si_signo=SIGCHLD, si_code=CLD_EXITED, si_pid=2667, si_uid=1000, si_status=0, si_utime=0, si_stime=0} ---
recvmsg(3, {msg_name=NULL, msg_namelen=0, msg_iov=[{iov_base="\0", iov_len=1}], msg_iovlen=1, msg_control=[{cmsg_len=20, cmsg_level=SOL_SOCKET, cmsg_type=SCM_RIGHTS, cmsg_data=[5]}], msg_controllen=24, msg_flags=0}, 0) = 1
write(2, "received tapfd=5\n", 17received tapfd=5
) = 17
close(3) = 0
fstat(1, {st_mode=S_IFCHR|0620, st_rdev=makedev(0x88, 0x1), ...}) = 0
write(1, "Starting slirp\n", 15Starting slirp
) = 15
write(1, "* MTU: 65520\n", 25* MTU: 65520
) = 25
write(1, "* Network: 10.0.2.0\n", 28* Network: 10.0.2.0
) = 28
write(1, "* Netmask: 255.255.255.0"..., 33* Netmask: 255.255.255.0
) = 33
write(1, "* Gateway: 10.0.2.2\n", 28* Gateway: 10.0.2.2
) = 28
write(1, "* DNS: 10.0.2.3\n", 28* DNS: 10.0.2.3
) = 28
write(1, "* Recommended IP: 10.0.2.100\n", 30* Recommended IP: 10.0.2.100
) = 30
write(1, "WARNING: 127.0.0.1:* on the host"..., 127WARNING: 127.0.0.1:* on the host is accessible as 10.0.2.2 (set --disable-host-loopback to prohibit connecting to 127.0.0.1:*)
) = 127
rt_sigaction(SIGPIPE, {sa_handler=SIG_IGN, sa_mask=[PIPE], sa_flags=SA_RESTORER|SA_RESTART, sa_restorer=0x7f5bc73e2f30}, {sa_handler=SIG_DFL, sa_mask=[], sa_flags=0}, 8) = 0
poll([{fd=5, events=POLLIN|POLLHUP}], 1, 1000) = 1 ([{fd=5, revents=POLLIN}])
read(5, "33\0\0\0\26\372N1\230}\325\206\335`\0\0\0\0$\0\1\0\0\0\0\0\0\0\0\0\0"..., 65536) = 90
brk(NULL) = 0xe03000
brk(0xe2e000) = 0xe2e000
poll([{fd=5, events=POLLIN|POLLHUP}], 1, 1000) = 1 ([{fd=5, revents=POLLIN}])

--

--

No responses yet