Breakdown of kvmtool SLIRP Implementation

M Castelino
5 min readApr 11, 2019

--

invocation

./lkvm run --kernel bzImage --disk rootfs.ext4 --debug --network mode=user

Inside the VM

ip addr add 192.168.33.2/24 dev enp0s0
ip link set up enp0s0
ip route add default via 192.168.33.1

Now you can reach the host

Implementation

Setup

837         ndev = calloc(1, sizeof(struct net_dev));
841 ops = malloc(sizeof(*ops));
847 list_add_tail(&ndev->list, &ndevs);
849 ndev->kvm = params->kvm;
850 ndev->params = params;
852 mutex_init(&ndev->mutex);
853 ndev->queue_pairs = max(1, min(VIRTIO_NET_NUM_QUEUES, params->mq));
854 ndev->config.status = VIRTIO_NET_S_LINK_UP;
855 if (ndev->queue_pairs > 1)
856 ndev->config.max_virtqueue_pairs = ndev->queue_pairs;
857
858 for (i = 0 ; i < 6 ; i++) {
859 ndev->config.mac[i] = params->guest_mac[i];
860 ndev->info.guest_mac.addr[i] = params->guest_mac[i];
861 ndev->info.host_mac.addr[i] = params->host_mac[i];
862 }
863
864 ndev->mode = params->mode;
865 if (ndev->mode == NET_MODE_TAP) {
866 ndev->ops = &tap_ops;
867 if (!virtio_net__tap_create(ndev))
868 die_perror("You have requested a TAP device, but creation of one has failed because");
869 } else {
870 ndev->info.host_ip = ntohl(inet_addr(params->host_ip));
871 ndev->info.guest_ip = ntohl(inet_addr(params->guest_ip));
872 ndev->info.guest_netmask = ntohl(inet_addr("255.255.255.0"));
873 ndev->info.buf_nr = 20,
874 ndev->ops = &uip_ops;
875 uip_static_init(&ndev->info);
876 }

virtio/net.c

static struct net_dev_operations uip_ops = {
.rx = uip_ops_rx,
.tx = uip_ops_tx,
};
static inline int uip_ops_tx(struct iovec *iov, u16 out, struct net_dev *ndev)
{
return uip_tx(iov, out, &ndev->info);
}
static inline int uip_ops_rx(struct iovec *iov, u16 in, struct net_dev *ndev)
{
return uip_rx(iov, in, &ndev->info); <<< note the ndev->info
}
168 static void *virtio_net_tx_thread(void *p)
169 {
188 while (1) {
194 while (virt_queue__available(vq)) {
195 struct virtio_net_hdr *hdr;
196 head = virt_queue__get_iov(vq, iov, &out, &in, kvm); << we get the iov here
197 hdr = iov[0].iov_base;
198 virtio_net_fix_tx_hdr(hdr, ndev);
199 len = ndev->ops->tx(iov, out, ndev); << we invoke the tx function

uip_tx

uip_tx -> case UIP_ETH_P_ARP: uip_tx_do_arp(&arg); case UIP_ETH_P_IP: uip_tx_do_ipv4(&arg);

ARP handling

(the gw is always the host, And that should be the only ARP we should see)

uip_tx_do_arp -> uip_tx_do_arp
9 info = arg->info;
10 buf = uip_buf_clone(arg); <<<< grab a device to guest buffer. buffer state goes from FREE -> INUSE
11
12 arp = (struct uip_arp *)(arg->eth);
13 arp2 = (struct uip_arp *)(buf->eth);
14
15 /*
16 * ARP replay code: 2 <<< typo, reply
17 */
18 arp2->op = htons(0x2);
19 arp2->dmac = arp->smac;
20 arp2->dip = arp->sip;
21
22 if (arp->dip == htonl(info->host_ip)) { <<<<< This is the gateway ARP. Pretty much the only one we should see
23 arp2->smac = info->host_mac;
24 arp2->sip = htonl(info->host_ip);
25
26 uip_buf_set_used(info, buf); <<< buffer is not set to USED, which I assume means ready to transmit
27 }

uip_rx

92 static void *virtio_net_rx_thread(void *p)
112 while (1) {
118 while (virt_queue__available(vq)) {
127 len = ndev->ops->rx(&dummy_iov, 1, ndev);
154 virt_queue__used_idx_advance(vq, num_buffers);
156 /* We should interrupt guest right now, otherwise latency is huge. */
157 if (virtio_queue__should_signal(vq))
158 ndev->vdev.ops->signal_vq(kvm, &ndev->vdev, id);
int uip_rx(struct iovec *iov, u16 in, struct uip_info *info)
{
/*
* Sleep until there is a buffer for guest
*/
buf = uip_buf_get_used(info); <<< if info->buf_used_nr < 0, walks the info->buf_head till it finds the first UIP_BUF_STATUS_USED. This is how the packet we set out to transmit makes it back from tx to rx
memcpy_toiovecend(iov, buf->vnet, 0, buf->vnet_len);
memcpy_toiovecend(iov, buf->eth, buf->vnet_len, buf->eth_len);
uip_buf_set_free(info, buf);
return len;
}

Handling of IPv4 Traffic

3 int uip_tx_do_ipv4(struct uip_tx_arg *arg)
4 {
14 switch (ip->proto) {
15 case UIP_IP_P_ICMP:
16 uip_tx_do_ipv4_icmp(arg);
17 break;
18 case UIP_IP_P_TCP:
19 uip_tx_do_ipv4_tcp(arg);
20 break;
21 case UIP_IP_P_UDP:
22 uip_tx_do_ipv4_udp(arg);
23 break;
24 default:
25 break;
26 }

ICMP

Provide a ICMP reply

3 int uip_tx_do_ipv4_icmp(struct uip_tx_arg *arg)
4 {
9 buf = uip_buf_clone(arg); <<<< grab a device to guest buffer
15 ip2->sip = ip->dip; <<<< flip it
16 ip2->dip = ip->sip;
18 /*
19 * ICMP reply: 0 <<<< Type 0 — Echo Reply,,, so a ICMP reply
20 */
21 icmp2->type = 0;
23 ip2->csum = uip_csum_ip(ip2);
24 icmp2->csum = uip_csum_icmp(icmp2);
26 uip_buf_set_used(arg->info, buf); <<< queue the reply
29 }

UDP

204 int uip_tx_do_ipv4_udp(struct uip_tx_arg *arg)
205 {
212 udp = (struct uip_udp *)(arg->eth);
213 ip = (struct uip_ip *)(arg->eth);
216 if (uip_udp_is_dhcp(udp)) {
217 uip_tx_do_ipv4_udp_dhcp(arg);
221 /*
222 * Find socket we have allocated before, otherwise allocate one
223 */
224 sk = uip_udp_socket_find(arg, ip->sip, ip->dip, udp->sport, udp->dport);
231 ret = uip_udp_socket_send(sk, udp);
>>>> ret = sendto(sk->fd, udp->payload, len, 0, (struct sockaddr *)&sk->addr, sizeof(sk->addr));
>>>> just send the UDP *data* out over the process socket
>>>> Also add the fd to the list of fd's that we should wait on for replies sk->fd to epoll_wait
234
235 if (!info->udp_thread)
236 pthread_create(&info->udp_thread, NULL, uip_udp_socket_thread, (void *)info);
>>>>> This thread waits for replies from all the opened UDP sockets
153 static void *uip_udp_socket_thread(void *p)
154 {
155 struct epoll_event events[UIP_UDP_MAX_EVENTS]; /// 1000 today
172 while (1) {
173 nfds = epoll_wait(info->udp_epollfd, events, UIP_UDP_MAX_EVENTS, -1);
178 for (i = 0; i < nfds; i++) {
181 payload_len = recvfrom(sk->fd, payload, UIP_MAX_UDP_PAYLOAD, 0, NULL, NULL);
188 buf = uip_buf_get_free(info);
190 uip_udp_make_pkg(info, sk, buf, payload, payload_len);
>>> Adds ethernet header
>>> Adds IP header
>>> Adds UDP header
195 uip_buf_set_used(info, buf);
>>>> queue it to the guest
196 }
197 }
198

TCP Handling

260 int uip_tx_do_ipv4_tcp(struct uip_tx_arg *arg)
261 {
270 /*
271 * Guest is trying to start a TCP session, let's fake SYN-ACK to guest
272 */
273 if (uip_tcp_is_syn(tcp)) {
274 sk = uip_tcp_socket_alloc(arg, ip->sip, ip->dip, tcp->sport, tcp->dport);
>>> Set's up a proxy connection
>>> Adds the host side socket to the list
278 sk->window_size = ntohs(tcp->win);
288 uip_tcp_payload_send(sk, UIP_TCP_FLAG_SYN | UIP_TCP_FLAG_ACK, 0);
>>> Send a sync ack to the guest
289 sk->seq_server += 1;
294 uip_tcp_socket_receive(sk);
>>> There is a single TCP handler thread that handles all the sockets
297 }
298
299 /*
302 sk = uip_tcp_socket_find(arg, ip->sip, ip->dip, tcp->sport, tcp->dport);
307 sk->window_size = ntohs(tcp->win);
308 sk->guest_acked = ntohl(tcp->ack);
312 if (uip_tcp_is_fin(tcp)) {
316 sk->write_done = 1;
317 sk->ack_server += 1;
318 uip_tcp_payload_send(sk, UIP_TCP_FLAG_ACK, 0);
323 uip_tcp_socket_close(sk, SHUT_WR);
326 }
327
331 if (uip_tcp_payloadlen(tcp) == 0)
332 goto out;
337 ret = uip_tcp_socket_send(sk, tcp);344 uip_tcp_payload_send(sk, UIP_TCP_FLAG_ACK, 0);
>>> Send ACK back to the guest
348 }

--

--