From: Alexander Bluhm Subject: TCP port reuse at bind and connect To: tech@openbsd.org Date: Thu, 21 Mar 2024 22:58:26 +0100 Hi, One of our customers creates a lot of TCP connections and runs into port reuse before maximum segment lifetime has expired. I would like to avoid that by selecting local ports smarter. Using random ports in such a scenario is not feasable, so we switched to linear port selection. This makes port reuse interval longer and more predictable. Unfortunately this is not enough. At customer connections are outgoing via multiple interfaces from several local addresses. My idea is to hash the local addresses and have multiple linear port selection counter. Default is our usual random port algorithm, the new feature is activated by sysctl. The sysctl also specifies the hash size. Hash algorithm is very primitive. It is not security relevant and usual SIP hash is rather slow. Do we want this feature in OpenBSD? On top of that I found another problem. in_pcbconnect() does not pass down the address it got from in_pcbselsrc() to in_pcbpickport(). As a consequence local port numbers selected during connect(2) must be unique although they belong to different addresses. This kind of uniqueness is not necessary and wastes more ports. To solve this, I pass ina from in_pcbconnect() to in_pcbbind_locked(). This does not interfere how wildcard sockets are matched with specific sockets during bind(2). It only allows non-wildcard sockets to share a local port during connect(2). I can split the latter bugfix from the diff if wanted. The problem is just hard to see and test if all ports are random. ok? bluhm Index: netinet/in_pcb.c =================================================================== RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/in_pcb.c,v diff -u -p -r1.296 in_pcb.c --- netinet/in_pcb.c 29 Feb 2024 12:01:59 -0000 1.296 +++ netinet/in_pcb.c 21 Mar 2024 20:38:57 -0000 @@ -80,6 +80,7 @@ #include #include #include +#include #include #include @@ -193,6 +194,8 @@ in_pcbinit(struct inpcbtable *table, int table->inpt_size = hashsize; arc4random_buf(&table->inpt_key, sizeof(table->inpt_key)); arc4random_buf(&table->inpt_lkey, sizeof(table->inpt_lkey)); + table->inpt_prevlports = NULL; + table->inpt_lportsize = 0; } /* @@ -277,12 +280,12 @@ in_pcballoc(struct socket *so, struct in } int -in_pcbbind_locked(struct inpcb *inp, struct mbuf *nam, struct proc *p) +in_pcbbind_locked(struct inpcb *inp, struct mbuf *nam, const void *laddr, + struct proc *p) { struct socket *so = inp->inp_socket; u_int16_t lport = 0; int wild = 0; - const void *laddr = &zeroin46_addr; int error; if (inp->inp_lport) @@ -359,7 +362,7 @@ in_pcbbind(struct inpcb *inp, struct mbu /* keep lookup, modification, and rehash in sync */ mtx_enter(&table->inpt_mtx); - error = in_pcbbind_locked(inp, nam, p); + error = in_pcbbind_locked(inp, nam, &zeroin46_addr, p); mtx_leave(&table->inpt_mtx); return error; @@ -444,6 +447,36 @@ in_pcbaddrisavail(const struct inpcb *in } int +in_pcbsysctl_lport(struct inpcbtable *table, void *oldp, size_t *oldlenp, + void *newp, size_t newlen) +{ + uint16_t *ports = NULL; + int error, size; + + size = READ_ONCE(table->inpt_lportsize); + error = sysctl_int_bounded(oldp, oldlenp, newp, newlen, &size, + 0, 0x10000); + if (error || newp == NULL) + return error; + + if (size > 0) { + ports = mallocarray(size, sizeof(uint16_t), M_PCB, M_WAITOK); + if (ports == NULL) + return ENOMEM; + arc4random_buf(ports, size * sizeof(uint16_t)); + } + + mtx_enter(&table->inpt_mtx); + free(table->inpt_prevlports, M_PCB, + table->inpt_lportsize * sizeof(uint16_t)); + table->inpt_prevlports = ports; + table->inpt_lportsize = size; + mtx_leave(&table->inpt_mtx); + + return 0; +} + +int in_pcbpickport(u_int16_t *lport, const void *laddr, int wild, const struct inpcb *inp, struct proc *p) { @@ -452,6 +485,7 @@ in_pcbpickport(u_int16_t *lport, const v struct inpcb *t; u_int16_t first, last, lower, higher, candidate, localport; int count; + uint32_t addrhash; MUTEX_ASSERT_LOCKED(&table->inpt_mtx); @@ -481,7 +515,24 @@ in_pcbpickport(u_int16_t *lport, const v */ count = higher - lower; - candidate = lower + arc4random_uniform(count); + if (table->inpt_prevlports != NULL) { + addrhash = rtable_l2(inp->inp_rtableid); +#ifdef INET6 + if (ISSET(inp->inp_flags, INP_IPV6)) { + const struct in6_addr *laddr6 = laddr; + + addrhash ^= laddr6->s6_addr32[0]; + addrhash ^= laddr6->s6_addr32[1]; + addrhash ^= laddr6->s6_addr32[2]; + addrhash ^= laddr6->s6_addr32[3]; + } else +#endif + addrhash ^= ((const struct in_addr *)laddr)->s_addr; + addrhash ^= addrhash >> 16; + addrhash %= table->inpt_lportsize; + candidate = table->inpt_prevlports[addrhash]; + } else + candidate = lower + arc4random_uniform(count); do { do { @@ -496,6 +547,8 @@ in_pcbpickport(u_int16_t *lport, const v inp->inp_rtableid, IN_PCBLOCK_HOLD); } while (t != NULL); *lport = localport; + if (table->inpt_prevlports != NULL) + table->inpt_prevlports[addrhash] = candidate; return (0); } @@ -542,7 +595,7 @@ in_pcbconnect(struct inpcb *inp, struct if (inp->inp_laddr.s_addr == INADDR_ANY) { if (inp->inp_lport == 0) { - error = in_pcbbind_locked(inp, NULL, curproc); + error = in_pcbbind_locked(inp, NULL, &ina, curproc); if (error) { mtx_leave(&table->inpt_mtx); return (error); Index: netinet/in_pcb.h =================================================================== RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/in_pcb.h,v diff -u -p -r1.152 in_pcb.h --- netinet/in_pcb.h 13 Feb 2024 12:22:09 -0000 1.152 +++ netinet/in_pcb.h 21 Mar 2024 12:27:57 -0000 @@ -203,6 +203,8 @@ struct inpcbtable { SIPHASH_KEY inpt_key, inpt_lkey; /* [I] secrets for hashes */ u_long inpt_mask, inpt_lmask; /* [t] hash masks */ int inpt_count, inpt_size; /* [t] queue count, hash size */ + uint16_t *inpt_prevlports; /* [t] array of used ports */ + int inpt_lportsize; /* [t] local port hash size */ }; /* flags in inp_flags: */ @@ -307,7 +309,8 @@ extern int in_pcbnotifymiss; void in_init(void); void in_losing(struct inpcb *); int in_pcballoc(struct socket *, struct inpcbtable *, int); -int in_pcbbind_locked(struct inpcb *, struct mbuf *, struct proc *); +int in_pcbbind_locked(struct inpcb *, struct mbuf *, const void *, + struct proc *); int in_pcbbind(struct inpcb *, struct mbuf *, struct proc *); int in_pcbaddrisavail(const struct inpcb *, struct sockaddr_in *, int, struct proc *); @@ -359,6 +362,8 @@ int in_rootonly(u_int16_t, u_int16_t); int in_pcbselsrc(struct in_addr *, struct sockaddr_in *, struct inpcb *); struct rtentry * in_pcbrtentry(struct inpcb *); +int +in_pcbsysctl_lport(struct inpcbtable *, void *, size_t *, void *, size_t); /* INET6 stuff */ struct rtentry * Index: netinet/tcp_usrreq.c =================================================================== RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/tcp_usrreq.c,v diff -u -p -r1.230 tcp_usrreq.c --- netinet/tcp_usrreq.c 11 Feb 2024 01:27:45 -0000 1.230 +++ netinet/tcp_usrreq.c 21 Mar 2024 12:27:57 -0000 @@ -1481,6 +1481,12 @@ tcp_sysctl(int *name, u_int namelen, voi NET_UNLOCK(); return (error); + case TCPCTL_LPORT_SIZE: + /* Linear port selection based on hash of local address. */ + error = in_pcbsysctl_lport(&tcbtable, oldp, oldlenp, newp, + newlen); + return (error); + default: NET_LOCK(); error = sysctl_bounded_arr(tcpctl_vars, nitems(tcpctl_vars), Index: netinet/tcp_var.h =================================================================== RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/tcp_var.h,v diff -u -p -r1.176 tcp_var.h --- netinet/tcp_var.h 13 Feb 2024 12:22:09 -0000 1.176 +++ netinet/tcp_var.h 21 Mar 2024 12:27:57 -0000 @@ -483,7 +483,8 @@ struct tcpstat { #define TCPCTL_ROOTONLY 24 /* return root only port bitmap */ #define TCPCTL_SYN_HASH_SIZE 25 /* number of buckets in the hash */ #define TCPCTL_TSO 26 /* enable TCP segmentation offload */ -#define TCPCTL_MAXID 27 +#define TCPCTL_LPORT_SIZE 27 /* linear port selection hash size */ +#define TCPCTL_MAXID 28 #define TCPCTL_NAMES { \ { 0, 0 }, \ @@ -513,6 +514,7 @@ struct tcpstat { { "rootonly", CTLTYPE_STRUCT }, \ { "synhashsize", CTLTYPE_INT }, \ { "tso", CTLTYPE_INT }, \ + { "linearport", CTLTYPE_INT }, \ } struct tcp_ident_mapping { Index: netinet6/in6_pcb.c =================================================================== RCS file: /data/mirror/openbsd/cvs/src/sys/netinet6/in6_pcb.c,v diff -u -p -r1.141 in6_pcb.c --- netinet6/in6_pcb.c 29 Feb 2024 12:01:59 -0000 1.141 +++ netinet6/in6_pcb.c 21 Mar 2024 12:27:57 -0000 @@ -313,7 +313,7 @@ in6_pcbconnect(struct inpcb *inp, struct if (IN6_IS_ADDR_UNSPECIFIED(&inp->inp_laddr6)) { if (inp->inp_lport == 0) { - error = in_pcbbind_locked(inp, NULL, curproc); + error = in_pcbbind_locked(inp, NULL, in6a, curproc); if (error) { mtx_leave(&table->inpt_mtx); return (error);