Index | Thread | Search

From:
Alexander Bluhm <bluhm@openbsd.org>
Subject:
TCP race in syn cache get
To:
tech@openbsd.org
Date:
Fri, 2 May 2025 23:14:01 +0200

Download raw body.

Thread
Hi,

When testing my TCP input unlocking diff, Mark Patruck found a race.

Setting the local and foreign address of a newly created socket did
not happen atomically.  During socket setup there was a small window
for an incpb that had a bound laddr, but faddr was emtpy.  Although
both listen and new socket are locked during syn_cache_get(),
in_pcblookup_listen() could find the incpb of the new socket.  When
a SYN packet of another connection arrived in parallel, it was
processed with the socket under construction instead of the listen
socket.

My fix below sets both faddr and laddr together in in_pcbset_addr().
The relevant code has been copied from in_pcbconnect().  Mutex
table->inpt_mtx guarantees that in_pcblookup_listen() finds the
listen socket.

ok?

bluhm

Index: netinet/in_pcb.c
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/in_pcb.c,v
diff -u -p -r1.312 in_pcb.c
--- netinet/in_pcb.c	12 Feb 2025 21:28:10 -0000	1.312
+++ netinet/in_pcb.c	2 May 2025 20:07:01 -0000
@@ -1330,33 +1330,50 @@ in_pcbset_rtableid(struct inpcb *inp, u_
 	return (0);
 }
 
-void
-in_pcbset_laddr(struct inpcb *inp, const struct sockaddr *sa, u_int rtableid)
+int
+in_pcbset_addr(struct inpcb *inp, const struct sockaddr *fsa,
+    const struct sockaddr *lsa, u_int rtableid)
 {
 	struct inpcbtable *table = inp->inp_table;
+	const struct sockaddr_in *fsin, *lsin;
+	struct inpcb *t;
 
-	mtx_enter(&table->inpt_mtx);
-	inp->inp_rtableid = rtableid;
 #ifdef INET6
 	if (ISSET(inp->inp_flags, INP_IPV6)) {
-		const struct sockaddr_in6 *sin6;
-
-		KASSERT(sa->sa_family == AF_INET6);
-		sin6 = satosin6_const(sa);
-		inp->inp_lport = sin6->sin6_port;
-		inp->inp_laddr6 = sin6->sin6_addr;
-	} else
+		KASSERT(fsa->sa_family == AF_INET6);
+		KASSERT(lsa->sa_family == AF_INET6);
+		return in6_pcbset_addr(inp, satosin6_const(fsa),
+		    satosin6_const(lsa), rtableid);
+	}
 #endif
-	{
-		const struct sockaddr_in *sin;
+	KASSERT(fsa->sa_family == AF_INET);
+	KASSERT(lsa->sa_family == AF_INET);
+	fsin = satosin_const(fsa);
+	lsin = satosin_const(lsa);
+
+	mtx_enter(&table->inpt_mtx);
 
-		KASSERT(sa->sa_family == AF_INET);
-		sin = satosin_const(sa);
-		inp->inp_lport = sin->sin_port;
-		inp->inp_laddr = sin->sin_addr;
+	t = in_pcblookup_lock(inp->inp_table, fsin->sin_addr, fsin->sin_port,
+	    lsin->sin_addr, lsin->sin_port, rtableid, IN_PCBLOCK_HOLD);
+	if (t != NULL) {
+		mtx_leave(&table->inpt_mtx);
+		return (EADDRINUSE);
 	}
+
+	inp->inp_rtableid = rtableid;
+	inp->inp_laddr = lsin->sin_addr;
+	inp->inp_lport = lsin->sin_port;
+	inp->inp_faddr = fsin->sin_addr;
+	inp->inp_fport = fsin->sin_port;
 	in_pcbrehash(inp);
+
 	mtx_leave(&table->inpt_mtx);
+
+#if NSTOEPLITZ > 0
+	inp->inp_flowid = stoeplitz_ip4port(inp->inp_faddr.s_addr,
+	    inp->inp_laddr.s_addr, inp->inp_fport, inp->inp_lport);
+#endif
+	return (0);
 }
 
 void
Index: netinet/in_pcb.h
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/in_pcb.h,v
diff -u -p -r1.166 in_pcb.h
--- netinet/in_pcb.h	2 Mar 2025 21:28:32 -0000	1.166
+++ netinet/in_pcb.h	2 May 2025 20:10:52 -0000
@@ -323,8 +323,8 @@ struct inpcb *
 void	 in_pcb_iterator_abort(struct inpcbtable *, struct inpcb *,
 	    struct inpcb_iterator *);
 struct inpcb *
-	 in_pcblookup(struct inpcbtable *, struct in_addr,
-			       u_int, struct in_addr, u_int, u_int);
+	 in_pcblookup(struct inpcbtable *, struct in_addr, u_int,
+	    struct in_addr, u_int, u_int);
 struct inpcb *
 	 in_pcblookup_listen(struct inpcbtable *, struct in_addr, u_int,
 	    struct mbuf *, u_int);
@@ -373,7 +373,10 @@ void	in6_pcbnotify(struct inpcbtable *, 
 	void (*)(struct inpcb *, int));
 int	in6_selecthlim(const struct inpcb *);
 int	in_pcbset_rtableid(struct inpcb *, u_int);
-void	in_pcbset_laddr(struct inpcb *, const struct sockaddr *, u_int);
+int	in_pcbset_addr(struct inpcb *, const struct sockaddr *,
+	    const struct sockaddr *, u_int);
+int	in6_pcbset_addr(struct inpcb *, const struct sockaddr_in6 *,
+	    const struct sockaddr_in6 *, u_int);
 void	in_pcbunset_faddr(struct inpcb *);
 void	in_pcbunset_laddr(struct inpcb *);
 
Index: netinet/tcp_input.c
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/tcp_input.c,v
diff -u -p -r1.442 tcp_input.c
--- netinet/tcp_input.c	29 Apr 2025 20:31:42 -0000	1.442
+++ netinet/tcp_input.c	2 May 2025 17:19:53 -0000
@@ -3542,7 +3542,6 @@ syn_cache_get(struct sockaddr *src, stru
 	struct socket *listenso;
 	struct inpcb *inp, *listeninp;
 	struct tcpcb *tp = NULL;
-	struct mbuf *am;
 	u_int rtableid;
 
 	NET_ASSERT_LOCKED();
@@ -3629,24 +3628,14 @@ syn_cache_get(struct sockaddr *src, stru
 		rtableid = divert->rdomain;
 	}
 #endif
-	in_pcbset_laddr(inp, dst, rtableid);
+	if (in_pcbset_addr(inp, src, dst, rtableid))
+		goto resetandabort;
 
 	/*
 	 * Give the new socket our cached route reference.
 	 */
 	inp->inp_route = sc->sc_route;		/* struct assignment */
 	sc->sc_route.ro_rt = NULL;
-
-	am = m_get(M_DONTWAIT, MT_SONAME);	/* XXX */
-	if (am == NULL)
-		goto resetandabort;
-	am->m_len = src->sa_len;
-	memcpy(mtod(am, caddr_t), src, src->sa_len);
-	if (in_pcbconnect(inp, am)) {
-		(void) m_free(am);
-		goto resetandabort;
-	}
-	(void) m_free(am);
 
 	tp->t_flags = intotcpcb(listeninp)->t_flags & (TF_NOPUSH|TF_NODELAY);
 	if (sc->sc_request_r_scale != 15) {
Index: netinet6/in6_pcb.c
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/netinet6/in6_pcb.c,v
diff -u -p -r1.147 in6_pcb.c
--- netinet6/in6_pcb.c	12 Feb 2025 21:28:11 -0000	1.147
+++ netinet6/in6_pcb.c	2 May 2025 20:11:22 -0000
@@ -336,9 +336,10 @@ in6_pcbconnect(struct inpcb *inp, struct
 	mtx_leave(&table->inpt_mtx);
 
 	inp->inp_flowinfo &= ~IPV6_FLOWLABEL_MASK;
-	if (ip6_auto_flowlabel)
+	if (ip6_auto_flowlabel) {
 		inp->inp_flowinfo |=
 		    (htonl(ip6_randomflowlabel()) & IPV6_FLOWLABEL_MASK);
+	}
 #if NSTOEPLITZ > 0
 	inp->inp_flowid = stoeplitz_ip6port(&inp->inp_faddr6,
 	    &inp->inp_laddr6, inp->inp_fport, inp->inp_lport);
@@ -702,4 +703,42 @@ in6_pcblookup_listen(struct inpcbtable *
 	}
 #endif
 	return (inp);
+}
+
+int
+in6_pcbset_addr(struct inpcb *inp, const struct sockaddr_in6 *fsin6,
+    const struct sockaddr_in6 *lsin6, u_int rtableid)
+{
+	struct inpcbtable *table = inp->inp_table;
+	struct inpcb *t;
+
+	mtx_enter(&table->inpt_mtx);
+
+	t = in6_pcblookup_lock(inp->inp_table, &fsin6->sin6_addr,
+	    fsin6->sin6_port, &lsin6->sin6_addr, lsin6->sin6_port,
+	    rtableid, IN_PCBLOCK_HOLD);
+	if (t != NULL) {
+		mtx_leave(&table->inpt_mtx);
+		return (EADDRINUSE);
+	}
+
+	inp->inp_rtableid = rtableid;
+	inp->inp_laddr6 = lsin6->sin6_addr;
+	inp->inp_lport = lsin6->sin6_port;
+	inp->inp_faddr6 = fsin6->sin6_addr;
+	inp->inp_fport = fsin6->sin6_port;
+	in_pcbrehash(inp);
+
+	mtx_leave(&table->inpt_mtx);
+
+	inp->inp_flowinfo &= ~IPV6_FLOWLABEL_MASK;
+	if (ip6_auto_flowlabel) {
+		inp->inp_flowinfo |=
+		    (htonl(ip6_randomflowlabel()) & IPV6_FLOWLABEL_MASK);
+	}
+#if NSTOEPLITZ > 0
+	inp->inp_flowid = stoeplitz_ip6port(&inp->inp_faddr6,
+	    &inp->inp_laddr6, inp->inp_fport, inp->inp_lport);
+#endif
+	return (0);
 }