Index | Thread | Search

From:
Alexander Bluhm <alexander.bluhm@gmx.net>
Subject:
TCP port reuse at bind and connect
To:
tech@openbsd.org
Date:
Thu, 21 Mar 2024 22:58:26 +0100

Download raw body.

Thread
Hi,

One of our customers creates a lot of TCP connections and runs into
port reuse before maximum segment lifetime has expired.  I would
like to avoid that by selecting local ports smarter.

Using random ports in such a scenario is not feasable, so we switched
to linear port selection.  This makes port reuse interval longer
and more predictable.  Unfortunately this is not enough.

At customer connections are outgoing via multiple interfaces from
several local addresses.  My idea is to hash the local addresses
and have multiple linear port selection counter.  Default is our
usual random port algorithm, the new feature is activated by sysctl.
The sysctl also specifies the hash size.  Hash algorithm is very
primitive.  It is not security relevant and usual SIP hash is rather
slow.

Do we want this feature in OpenBSD?

On top of that I found another problem.  in_pcbconnect() does not
pass down the address it got from in_pcbselsrc() to in_pcbpickport().
As a consequence local port numbers selected during connect(2) must
be unique although they belong to different addresses.  This kind
of uniqueness is not necessary and wastes more ports.

To solve this, I pass ina from in_pcbconnect() to in_pcbbind_locked().
This does not interfere how wildcard sockets are matched with
specific sockets during bind(2).  It only allows non-wildcard sockets
to share a local port during connect(2).

I can split the latter bugfix from the diff if wanted.  The problem
is just hard to see and test if all ports are random.

ok?

bluhm

Index: netinet/in_pcb.c
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/in_pcb.c,v
diff -u -p -r1.296 in_pcb.c
--- netinet/in_pcb.c	29 Feb 2024 12:01:59 -0000	1.296
+++ netinet/in_pcb.c	21 Mar 2024 20:38:57 -0000
@@ -80,6 +80,7 @@
 #include <sys/mount.h>
 #include <sys/pool.h>
 #include <sys/proc.h>
+#include <sys/sysctl.h>
 
 #include <net/if.h>
 #include <net/if_var.h>
@@ -193,6 +194,8 @@ in_pcbinit(struct inpcbtable *table, int
 	table->inpt_size = hashsize;
 	arc4random_buf(&table->inpt_key, sizeof(table->inpt_key));
 	arc4random_buf(&table->inpt_lkey, sizeof(table->inpt_lkey));
+	table->inpt_prevlports = NULL;
+	table->inpt_lportsize = 0;
 }
 
 /*
@@ -277,12 +280,12 @@ in_pcballoc(struct socket *so, struct in
 }
 
 int
-in_pcbbind_locked(struct inpcb *inp, struct mbuf *nam, struct proc *p)
+in_pcbbind_locked(struct inpcb *inp, struct mbuf *nam, const void *laddr,
+    struct proc *p)
 {
 	struct socket *so = inp->inp_socket;
 	u_int16_t lport = 0;
 	int wild = 0;
-	const void *laddr = &zeroin46_addr;
 	int error;
 
 	if (inp->inp_lport)
@@ -359,7 +362,7 @@ in_pcbbind(struct inpcb *inp, struct mbu
 
 	/* keep lookup, modification, and rehash in sync */
 	mtx_enter(&table->inpt_mtx);
-	error = in_pcbbind_locked(inp, nam, p);
+	error = in_pcbbind_locked(inp, nam, &zeroin46_addr, p);
 	mtx_leave(&table->inpt_mtx);
 
 	return error;
@@ -444,6 +447,36 @@ in_pcbaddrisavail(const struct inpcb *in
 }
 
 int
+in_pcbsysctl_lport(struct inpcbtable *table, void *oldp, size_t *oldlenp,
+    void *newp, size_t newlen)
+{
+	uint16_t *ports = NULL;
+	int error, size;
+
+	size = READ_ONCE(table->inpt_lportsize);
+	error = sysctl_int_bounded(oldp, oldlenp, newp, newlen, &size,
+	    0, 0x10000);
+	if (error || newp == NULL)
+		return error;
+
+	if (size > 0) {
+		ports = mallocarray(size, sizeof(uint16_t), M_PCB, M_WAITOK);
+		if (ports == NULL)
+			return ENOMEM;
+		arc4random_buf(ports, size * sizeof(uint16_t));
+	}
+
+	mtx_enter(&table->inpt_mtx);
+	free(table->inpt_prevlports, M_PCB,
+	    table->inpt_lportsize * sizeof(uint16_t));
+	table->inpt_prevlports = ports;
+	table->inpt_lportsize = size;
+	mtx_leave(&table->inpt_mtx);
+
+	return 0;
+}
+
+int
 in_pcbpickport(u_int16_t *lport, const void *laddr, int wild,
     const struct inpcb *inp, struct proc *p)
 {
@@ -452,6 +485,7 @@ in_pcbpickport(u_int16_t *lport, const v
 	struct inpcb *t;
 	u_int16_t first, last, lower, higher, candidate, localport;
 	int count;
+	uint32_t addrhash;
 
 	MUTEX_ASSERT_LOCKED(&table->inpt_mtx);
 
@@ -481,7 +515,24 @@ in_pcbpickport(u_int16_t *lport, const v
 	 */
 
 	count = higher - lower;
-	candidate = lower + arc4random_uniform(count);
+	if (table->inpt_prevlports != NULL) {
+		addrhash = rtable_l2(inp->inp_rtableid);
+#ifdef INET6
+		if (ISSET(inp->inp_flags, INP_IPV6)) {
+			const struct in6_addr *laddr6 = laddr;
+
+			addrhash ^= laddr6->s6_addr32[0];
+			addrhash ^= laddr6->s6_addr32[1];
+			addrhash ^= laddr6->s6_addr32[2];
+			addrhash ^= laddr6->s6_addr32[3];
+		} else
+#endif
+			addrhash ^= ((const struct in_addr *)laddr)->s_addr;
+		addrhash ^= addrhash >> 16;
+		addrhash %= table->inpt_lportsize;
+		candidate = table->inpt_prevlports[addrhash];
+	} else
+		candidate = lower + arc4random_uniform(count);
 
 	do {
 		do {
@@ -496,6 +547,8 @@ in_pcbpickport(u_int16_t *lport, const v
 		    inp->inp_rtableid, IN_PCBLOCK_HOLD);
 	} while (t != NULL);
 	*lport = localport;
+	if (table->inpt_prevlports != NULL)
+		table->inpt_prevlports[addrhash] = candidate;
 
 	return (0);
 }
@@ -542,7 +595,7 @@ in_pcbconnect(struct inpcb *inp, struct 
 
 	if (inp->inp_laddr.s_addr == INADDR_ANY) {
 		if (inp->inp_lport == 0) {
-			error = in_pcbbind_locked(inp, NULL, curproc);
+			error = in_pcbbind_locked(inp, NULL, &ina, curproc);
 			if (error) {
 				mtx_leave(&table->inpt_mtx);
 				return (error);
Index: netinet/in_pcb.h
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/in_pcb.h,v
diff -u -p -r1.152 in_pcb.h
--- netinet/in_pcb.h	13 Feb 2024 12:22:09 -0000	1.152
+++ netinet/in_pcb.h	21 Mar 2024 12:27:57 -0000
@@ -203,6 +203,8 @@ struct inpcbtable {
 	SIPHASH_KEY inpt_key, inpt_lkey;	/* [I] secrets for hashes */
 	u_long	inpt_mask, inpt_lmask;		/* [t] hash masks */
 	int	inpt_count, inpt_size;		/* [t] queue count, hash size */
+	uint16_t *inpt_prevlports;		/* [t] array of used ports */
+	int	inpt_lportsize;			/* [t] local port hash size */
 };
 
 /* flags in inp_flags: */
@@ -307,7 +309,8 @@ extern int in_pcbnotifymiss;
 void	 in_init(void);
 void	 in_losing(struct inpcb *);
 int	 in_pcballoc(struct socket *, struct inpcbtable *, int);
-int	 in_pcbbind_locked(struct inpcb *, struct mbuf *, struct proc *);
+int	 in_pcbbind_locked(struct inpcb *, struct mbuf *, const void *,
+	    struct proc *);
 int	 in_pcbbind(struct inpcb *, struct mbuf *, struct proc *);
 int	 in_pcbaddrisavail(const struct inpcb *, struct sockaddr_in *, int,
 	    struct proc *);
@@ -359,6 +362,8 @@ int	 in_rootonly(u_int16_t, u_int16_t);
 int	 in_pcbselsrc(struct in_addr *, struct sockaddr_in *, struct inpcb *);
 struct rtentry *
 	in_pcbrtentry(struct inpcb *);
+int
+in_pcbsysctl_lport(struct inpcbtable *, void *, size_t *, void *, size_t);
 
 /* INET6 stuff */
 struct rtentry *
Index: netinet/tcp_usrreq.c
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/tcp_usrreq.c,v
diff -u -p -r1.230 tcp_usrreq.c
--- netinet/tcp_usrreq.c	11 Feb 2024 01:27:45 -0000	1.230
+++ netinet/tcp_usrreq.c	21 Mar 2024 12:27:57 -0000
@@ -1481,6 +1481,12 @@ tcp_sysctl(int *name, u_int namelen, voi
 		NET_UNLOCK();
 		return (error);
 
+	case TCPCTL_LPORT_SIZE:
+		/* Linear port selection based on hash of local address. */
+		error = in_pcbsysctl_lport(&tcbtable, oldp, oldlenp, newp,
+		    newlen);
+		return (error);
+
 	default:
 		NET_LOCK();
 		error = sysctl_bounded_arr(tcpctl_vars, nitems(tcpctl_vars),
Index: netinet/tcp_var.h
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/tcp_var.h,v
diff -u -p -r1.176 tcp_var.h
--- netinet/tcp_var.h	13 Feb 2024 12:22:09 -0000	1.176
+++ netinet/tcp_var.h	21 Mar 2024 12:27:57 -0000
@@ -483,7 +483,8 @@ struct	tcpstat {
 #define TCPCTL_ROOTONLY	       24 /* return root only port bitmap */
 #define	TCPCTL_SYN_HASH_SIZE   25 /* number of buckets in the hash */
 #define	TCPCTL_TSO	       26 /* enable TCP segmentation offload */
-#define	TCPCTL_MAXID	       27
+#define	TCPCTL_LPORT_SIZE      27 /* linear port selection hash size */
+#define	TCPCTL_MAXID	       28
 
 #define	TCPCTL_NAMES { \
 	{ 0, 0 }, \
@@ -513,6 +514,7 @@ struct	tcpstat {
 	{ "rootonly",	CTLTYPE_STRUCT }, \
 	{ "synhashsize",	CTLTYPE_INT }, \
 	{ "tso",	CTLTYPE_INT }, \
+	{ "linearport",	CTLTYPE_INT }, \
 }
 
 struct tcp_ident_mapping {
Index: netinet6/in6_pcb.c
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/netinet6/in6_pcb.c,v
diff -u -p -r1.141 in6_pcb.c
--- netinet6/in6_pcb.c	29 Feb 2024 12:01:59 -0000	1.141
+++ netinet6/in6_pcb.c	21 Mar 2024 12:27:57 -0000
@@ -313,7 +313,7 @@ in6_pcbconnect(struct inpcb *inp, struct
 
 	if (IN6_IS_ADDR_UNSPECIFIED(&inp->inp_laddr6)) {
 		if (inp->inp_lport == 0) {
-			error = in_pcbbind_locked(inp, NULL, curproc);
+			error = in_pcbbind_locked(inp, NULL, in6a, curproc);
 			if (error) {
 				mtx_leave(&table->inpt_mtx);
 				return (error);