Skip to content

Commit

Permalink
Merge pull request #24 from m-lab/sandbox-sbs
Browse files Browse the repository at this point in the history
Initial support for quick NDT7 download test
  • Loading branch information
bassosimone authored Sep 21, 2018
2 parents 44a6b67 + 0920c88 commit 15d4d33
Show file tree
Hide file tree
Showing 10 changed files with 465 additions and 106 deletions.
175 changes: 175 additions & 0 deletions bbr/bbr.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,175 @@
// Package bbr contains code required to read BBR variables of a net.Conn
// on which we're serving a WebSocket client. This code currently only
// works on Linux systems, as BBR is only available there.
//
// To read BBR variables, we need a file descriptor. When serving a WebSocket
// client we have a websocket.Conn instance. The UnderlyingConn() method allows
// us to get the corresponding net.Conn, which typically is a tls.Conn. Yet,
// obtaining a file descriptor from a tls.Conn seems complex, because the
// underlying socket connection is private. Still, we've a custom listener that
// is required to turn on BBR (see tcpListenerEx). At that point, we can
// obtain a *os.File from the *net.TCPConn. From such *os.File, we can then
// get a file descriptor. However, there are some complications:
//
// a) the returned *os.File is bound to another file descriptor that is
// a dup() of the one inside the *net.Conn, so, we should keep that
// *os.File alive during the whole NDT7 measurement;
//
// b) in Go < 1.11, using this technique makes the file descriptor use
// blocking I/O, which spawns more threads (see below).
//
// For these reasons, we're keeping a cache mapping between the socket's four
// tuple (i.e. local and remote address and port) and the *os.File. We use the
// four tuple because, in principle, a server can be serving on more than a
// single local IP, so only using the remote endpoint may not be enough.
//
// In the good case, this is what is gonna happen:
//
// 1. a connection is accepted in tcpListenerEx, so we have a *net.TCPConn;
//
// 2. using the *net.Conn, we turn on BBR and cache the *os.File using
// bbr.EnableAndRememberFile() with the four tuple as the key;
//
// 3. WebSocket negotiation is successful, so we have a websocket.Conn, from
// which we can get the underlying connection and hence the four tuple;
//
// 4. using the four tuple, we can retrieve the *os.File, removing it from
// the cache using bbr.GetAndForgetFile();
//
// 5. we defer *os.File.Close() until the end of the WebSocket serving loop and
// periodically we use such file to obtain the file descriptor and read the
// BBR variables using bbr.GetBandwidthAndRTT().
//
// Because a connection might be closed between steps 2. and 3. (i.e. after
// the connection is accepted and before the HTTP layer finishes reading the
// request and determines that it should be routed to the handler that we
// have configured), we also need a stale entry management mechanism so that
// we delete *os.File instances cached for too much time.
//
// Depending on whether Golang calls shutdown() when a socket is closed or
// not, it might be that this caching mechanism keeps connections alive for
// more time than expected. The specific case where we can have this issue
// is the one where we receive a HTTP connection that is not a valid UPGRADE
// request, but a valid HTTP request. To avoid this issue, we SHOULD make
// sure to remove the *os.File from the cache basically everytime we got our
// handler called, regardless of whether the request is a valid UPGRADE.
package bbr

import (
"errors"
"net"
"os"
"sync"
"time"
)

// ErrNoSupport indicates that this system does not support BBR.
var ErrNoSupport = errors.New("No support for BBR")

// connKey is the key associated to a TCP connection.
type connKey string

// makekey creates a connKey from |conn|.
func makekey(conn net.Conn) connKey {
return connKey(conn.LocalAddr().String() + "<=>" + conn.RemoteAddr().String())
}

// entry is an entry inside the cache.
type entry struct {
Fp *os.File
Stamp time.Time
}

// cache maps a connKey to the corresponding *os.File.
var cache map[connKey]entry = make(map[connKey]entry)

// mutex serializes access to cache.
var mutex sync.Mutex

// lastCheck is last time when we checked the cache for stale entries.
var lastCheck time.Time

// checkInterval is the interval between each check for stale entries.
const checkInterval = 500 * time.Millisecond

// maxInactive is the amount of time after which an entry is stale.
const maxInactive = 3 * time.Second

// EnableAndRememberFile enables BBR on |tc| and remembers the associated
// *os.File for later, when we'll need it to access BBR stats.
func EnableAndRememberFile(tc *net.TCPConn) error {
// Implementation note: according to a 2013 message on golang-nuts [1], the
// code that follows is broken on Unix because calling File() makes the socket
// blocking so causing Go to use more threads and, additionally, "timer wheel
// inside net package never fires". However, an April, 19 2018 commit
// on src/net/tcpsock.go apparently has removed such restriction and so now
// (i.e. since go1.11beta1) it's safe to use the code below [2, 3].
//
// [1] https://grokbase.com/t/gg/golang-nuts/1349whs82r
//
// [2] https://github.com/golang/go/commit/60e3ebb9cba
//
// [3] https://github.com/golang/go/issues/24942
//
// TODO(bassosimone): Should we require builds using the latest version
// of Go? Warn for earlier versions? Or is this not that big a deal?
fp, err := tc.File()
if err != nil {
return err
}
err = enableBBR(fp)
if err != nil {
// Do not leak the file. It is important to stress that golang returns
// a dup()ed descriptor along with |fp|, hence the |tc| connection isn't
// closed after we've closed |fp|.
fp.Close()
return err
}
curTime := time.Now()
key := makekey(tc)
mutex.Lock()
defer mutex.Unlock()
if curTime.Sub(lastCheck) > checkInterval {
lastCheck = curTime
// Note: in Golang it's safe to remove elements from the map while
// iterating it. See <https://github.com/golang/go/issues/9926>.
for key, entry := range cache {
if curTime.Sub(entry.Stamp) > maxInactive {
entry.Fp.Close()
delete(cache, key)
}
}
}
cache[key] = entry{
Fp: fp, // This takes ownership of fp
Stamp: curTime,
}
return nil
}

// GetAndForgetFile returns the *os.File bound to |conn| that was previously
// saved with EnableAndRememberFile, or nil if no file was found. Note that you
// are given ownership of the returned file pointer. As the name implies, the
// *os.File is removed from the cache by this operation.
func GetAndForgetFile(conn net.Conn) *os.File {
key := makekey(conn)
mutex.Lock()
defer mutex.Unlock()
entry, found := cache[key]
if !found {
return nil
}
delete(cache, key)
return entry.Fp // Pass ownership to caller
}

// GetBandwidthAndRTT obtains BBR info from |fp|. The returned values are
// the max-bandwidth in bytes/s and the min-rtt in microseconds.
func GetBandwidthAndRTT(fp *os.File) (float64, float64, error) {
// Implementation note: for simplicity I have decided to use float64 here
// rather than uint64, mainly because the proper C type to use AFAICT (and
// I may be wrong here) changes between 32 and 64 bit. That is, it is not
// clear to me how to use a 64 bit integer (which I what I would have used
// by default) on a 32 bit system. So let's use float64.
return getBandwidthAndRTT(fp)
}
38 changes: 38 additions & 0 deletions bbr/bbr_linux.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
#include <sys/socket.h>
#include <linux/inet_diag.h>
#include <netinet/in.h>
#include <netinet/tcp.h>

#include <errno.h>
#include <stdint.h>
#include <string.h>

int get_bbr_info(int fd, double *bw, double *rtt) {
if (bw == NULL || rtt == NULL) {
return EINVAL; /* You passed me an invalid argument */
}
/* With old kernels, like the one that we have by default on Travis, there
is no support for BBR, which breaks the integration tests. For now we
just work around it. TODO(bassosimone): Use docker on travis? */
#ifdef TCP_CC_INFO
union tcp_cc_info ti;
memset(&ti, 0, sizeof(ti));
socklen_t tilen = sizeof(ti);
if (getsockopt(fd, IPPROTO_TCP, TCP_CC_INFO, &ti, &tilen) == -1) {
return errno; /* Whatever libc said went wrong */
}
/* Apparently, tcp_bbr_info is the only congestion control data structure
to occupy five 32 bit words. Currently, in September 2018, the other two
data structures (i.e. Vegas and DCTCP) both occupy four 32 bit words.
See include/uapi/linux/inet_diag.h in torvalds/linux@bbb6189d. */
if (tilen != sizeof(struct tcp_bbr_info)) {
return EINVAL; /* You passed me a socket that is not using TCP BBR */
}
*bw = (double)((((uint64_t)ti.bbr.bbr_bw_hi) << 32) |
((uint64_t)ti.bbr.bbr_bw_lo));
*rtt = (double)ti.bbr.bbr_min_rtt;
return 0;
#else
return ENOSYS; /* This kernel does not support getting TCP BBR info */
#endif
}
27 changes: 27 additions & 0 deletions bbr/bbr_linux.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
package bbr

// #cgo CFLAGS: -Wall -Wextra -Werror -std=c11 -Wno-unused-parameter
// #include "bbr_linux.h"
import "C"

import (
"os"
"syscall"
)

func enableBBR(fp *os.File) error {
// Note: Fd() returns uintptr but on Unix we can safely use int for sockets.
return syscall.SetsockoptString(int(fp.Fd()), syscall.IPPROTO_TCP,
syscall.TCP_CONGESTION, "bbr")
}

func getBandwidthAndRTT(fp *os.File) (float64, float64, error) {
bw := C.double(0)
rtt := C.double(0)
// Note: Fd() returns uintptr but on Unix we can safely use int for sockets.
rv := C.get_bbr_info(C.int(fp.Fd()), &bw, &rtt)
if rv != 0 {
return 0.0, 0.0, syscall.Errno(rv)
}
return float64(bw), float64(rtt), nil
}
16 changes: 16 additions & 0 deletions bbr/bbr_linux.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
#ifndef M_LAB_NDT_CLOUD_BBR_BBR_LINUX_H
#define M_LAB_NDT_CLOUD_BBR_BBR_LINUX_H
#ifdef __cplusplus
extern "C" {
#endif

/* get_bbr_info retrieves BBR info from |fd| and stores them in |bw| and
|rtt| respectively. The bandwidth, |bw|, will be in bytes/s, while
the RTT, |rtt|, will be in microseconds. On success, returns zero. On
failure returns a nonzero errno value indicating the error that occurred. */
int get_bbr_info(int fd, double *bw, double *rtt);

#ifdef __cplusplus
} // extern "C"
#endif
#endif /* M_LAB_NDT_CLOUD_BBR_BBR_LINUX_H */
15 changes: 15 additions & 0 deletions bbr/bbr_stub.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
// +build !linux

package bbr

import (
"os"
)

func enableBBR(*os.File) error {
return ErrNoSupport
}

func getBandwidthAndRTT(*os.File) (float64, float64, error) {
return 0.0, 0.0, ErrNoSupport
}
38 changes: 25 additions & 13 deletions ndt-server.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ import (

"github.com/gorilla/websocket"
"github.com/m-lab/ndt-cloud/ndt7"
"github.com/m-lab/ndt-cloud/netx"
"github.com/m-lab/ndt-cloud/bbr"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/client_golang/prometheus/promhttp"
)
Expand Down Expand Up @@ -105,15 +105,21 @@ func init() {
prometheus.MustRegister(lameDuck)
}

// Note: Copied from net/http package.
// tcpListenerEx is the place where we accept new TCP connections and
// set specific options on such connections. Namely, we set TCP keep-alive
// timeouts on accepted connections, and we turn on TCP BBR. The former
// option is used so dead TCP connections (e.g. closing laptop mid-download)
// eventually go away. The latter is used to experiment with BBR.
// set specific options on such connections. We unconditionally set the
// keepalive timeout for all connections, so that dead TCP connections
// (e.g. laptop closed amid a download) eventually go away. If the
// TryToEnableBBR setting is true, we additionally try to (1) enable
// BBR on the socket; (2) record the *os.File bound to a *net.TCPConn
// such that later we can collect BBR stats (see the bbr package for
// more info). As the name implies, TryToEnableBBR does its best to
// enable BBR but not succeding is also acceptable especially on systems
// where there is no support for BBR.
//
// Note: Adapted from net/http package.
type tcpListenerEx struct {
*net.TCPListener
EnableBBR bool
TryToEnableBBR bool
}

func (ln tcpListenerEx) Accept() (net.Conn, error) {
Expand All @@ -123,10 +129,16 @@ func (ln tcpListenerEx) Accept() (net.Conn, error) {
}
tc.SetKeepAlive(true)
tc.SetKeepAlivePeriod(3 * time.Minute)
if ln.EnableBBR {
err = netx.EnableBBR(tc)
if err != nil {
return nil, err // Error already printed by EnableBBR()
if ln.TryToEnableBBR {
err = bbr.EnableAndRememberFile(tc)
if err != nil && err != bbr.ErrNoSupport {
// This is the case in which we compiled in BBR support but something
// was wrong when enabling BBR at runtime. TODO(bassosimone): when we'll
// have BBR support on the whole fleet, here we should probably return
// an error rather than continuing. For now we'll tolerate.
log.Printf("Cannot initialize BBR: %s", err.Error())
} else if err == bbr.ErrNoSupport {
log.Printf("Your system does not support BBR")
}
}
return tc, nil
Expand Down Expand Up @@ -444,7 +456,7 @@ func listenRandom() (net.Listener, int, error) {
return nil, 0, err
}
port := ln.Addr().(*net.TCPAddr).Port
return tcpListenerEx{TCPListener: ln, EnableBBR: false}, port, nil
return tcpListenerEx{TCPListener: ln, TryToEnableBBR: false}, port, nil
}

func manageS2cTest(ws *websocket.Conn) (float64, error) {
Expand Down Expand Up @@ -674,6 +686,6 @@ func main() {
log.Fatal(err)
}
s := &http.Server{Handler: http.DefaultServeMux}
log.Fatal(s.ServeTLS(tcpListenerEx{TCPListener: ln, EnableBBR: true},
log.Fatal(s.ServeTLS(tcpListenerEx{TCPListener: ln, TryToEnableBBR: true},
*fCertFile, *fKeyFile))
}
Loading

0 comments on commit 15d4d33

Please sign in to comment.