Download raw body.
TCP race in syn cache get
Hi,
When testing my TCP input unlocking diff, Mark Patruck found a race.
Setting the local and foreign address of a newly created socket did
not happen atomically. During socket setup there was a small window
for an incpb that had a bound laddr, but faddr was emtpy. Although
both listen and new socket are locked during syn_cache_get(),
in_pcblookup_listen() could find the incpb of the new socket. When
a SYN packet of another connection arrived in parallel, it was
processed with the socket under construction instead of the listen
socket.
My fix below sets both faddr and laddr together in in_pcbset_addr().
The relevant code has been copied from in_pcbconnect(). Mutex
table->inpt_mtx guarantees that in_pcblookup_listen() finds the
listen socket.
ok?
bluhm
Index: netinet/in_pcb.c
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/in_pcb.c,v
diff -u -p -r1.312 in_pcb.c
--- netinet/in_pcb.c 12 Feb 2025 21:28:10 -0000 1.312
+++ netinet/in_pcb.c 2 May 2025 20:07:01 -0000
@@ -1330,33 +1330,50 @@ in_pcbset_rtableid(struct inpcb *inp, u_
return (0);
}
-void
-in_pcbset_laddr(struct inpcb *inp, const struct sockaddr *sa, u_int rtableid)
+int
+in_pcbset_addr(struct inpcb *inp, const struct sockaddr *fsa,
+ const struct sockaddr *lsa, u_int rtableid)
{
struct inpcbtable *table = inp->inp_table;
+ const struct sockaddr_in *fsin, *lsin;
+ struct inpcb *t;
- mtx_enter(&table->inpt_mtx);
- inp->inp_rtableid = rtableid;
#ifdef INET6
if (ISSET(inp->inp_flags, INP_IPV6)) {
- const struct sockaddr_in6 *sin6;
-
- KASSERT(sa->sa_family == AF_INET6);
- sin6 = satosin6_const(sa);
- inp->inp_lport = sin6->sin6_port;
- inp->inp_laddr6 = sin6->sin6_addr;
- } else
+ KASSERT(fsa->sa_family == AF_INET6);
+ KASSERT(lsa->sa_family == AF_INET6);
+ return in6_pcbset_addr(inp, satosin6_const(fsa),
+ satosin6_const(lsa), rtableid);
+ }
#endif
- {
- const struct sockaddr_in *sin;
+ KASSERT(fsa->sa_family == AF_INET);
+ KASSERT(lsa->sa_family == AF_INET);
+ fsin = satosin_const(fsa);
+ lsin = satosin_const(lsa);
+
+ mtx_enter(&table->inpt_mtx);
- KASSERT(sa->sa_family == AF_INET);
- sin = satosin_const(sa);
- inp->inp_lport = sin->sin_port;
- inp->inp_laddr = sin->sin_addr;
+ t = in_pcblookup_lock(inp->inp_table, fsin->sin_addr, fsin->sin_port,
+ lsin->sin_addr, lsin->sin_port, rtableid, IN_PCBLOCK_HOLD);
+ if (t != NULL) {
+ mtx_leave(&table->inpt_mtx);
+ return (EADDRINUSE);
}
+
+ inp->inp_rtableid = rtableid;
+ inp->inp_laddr = lsin->sin_addr;
+ inp->inp_lport = lsin->sin_port;
+ inp->inp_faddr = fsin->sin_addr;
+ inp->inp_fport = fsin->sin_port;
in_pcbrehash(inp);
+
mtx_leave(&table->inpt_mtx);
+
+#if NSTOEPLITZ > 0
+ inp->inp_flowid = stoeplitz_ip4port(inp->inp_faddr.s_addr,
+ inp->inp_laddr.s_addr, inp->inp_fport, inp->inp_lport);
+#endif
+ return (0);
}
void
Index: netinet/in_pcb.h
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/in_pcb.h,v
diff -u -p -r1.166 in_pcb.h
--- netinet/in_pcb.h 2 Mar 2025 21:28:32 -0000 1.166
+++ netinet/in_pcb.h 2 May 2025 20:10:52 -0000
@@ -323,8 +323,8 @@ struct inpcb *
void in_pcb_iterator_abort(struct inpcbtable *, struct inpcb *,
struct inpcb_iterator *);
struct inpcb *
- in_pcblookup(struct inpcbtable *, struct in_addr,
- u_int, struct in_addr, u_int, u_int);
+ in_pcblookup(struct inpcbtable *, struct in_addr, u_int,
+ struct in_addr, u_int, u_int);
struct inpcb *
in_pcblookup_listen(struct inpcbtable *, struct in_addr, u_int,
struct mbuf *, u_int);
@@ -373,7 +373,10 @@ void in6_pcbnotify(struct inpcbtable *,
void (*)(struct inpcb *, int));
int in6_selecthlim(const struct inpcb *);
int in_pcbset_rtableid(struct inpcb *, u_int);
-void in_pcbset_laddr(struct inpcb *, const struct sockaddr *, u_int);
+int in_pcbset_addr(struct inpcb *, const struct sockaddr *,
+ const struct sockaddr *, u_int);
+int in6_pcbset_addr(struct inpcb *, const struct sockaddr_in6 *,
+ const struct sockaddr_in6 *, u_int);
void in_pcbunset_faddr(struct inpcb *);
void in_pcbunset_laddr(struct inpcb *);
Index: netinet/tcp_input.c
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/tcp_input.c,v
diff -u -p -r1.442 tcp_input.c
--- netinet/tcp_input.c 29 Apr 2025 20:31:42 -0000 1.442
+++ netinet/tcp_input.c 2 May 2025 17:19:53 -0000
@@ -3542,7 +3542,6 @@ syn_cache_get(struct sockaddr *src, stru
struct socket *listenso;
struct inpcb *inp, *listeninp;
struct tcpcb *tp = NULL;
- struct mbuf *am;
u_int rtableid;
NET_ASSERT_LOCKED();
@@ -3629,24 +3628,14 @@ syn_cache_get(struct sockaddr *src, stru
rtableid = divert->rdomain;
}
#endif
- in_pcbset_laddr(inp, dst, rtableid);
+ if (in_pcbset_addr(inp, src, dst, rtableid))
+ goto resetandabort;
/*
* Give the new socket our cached route reference.
*/
inp->inp_route = sc->sc_route; /* struct assignment */
sc->sc_route.ro_rt = NULL;
-
- am = m_get(M_DONTWAIT, MT_SONAME); /* XXX */
- if (am == NULL)
- goto resetandabort;
- am->m_len = src->sa_len;
- memcpy(mtod(am, caddr_t), src, src->sa_len);
- if (in_pcbconnect(inp, am)) {
- (void) m_free(am);
- goto resetandabort;
- }
- (void) m_free(am);
tp->t_flags = intotcpcb(listeninp)->t_flags & (TF_NOPUSH|TF_NODELAY);
if (sc->sc_request_r_scale != 15) {
Index: netinet6/in6_pcb.c
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/netinet6/in6_pcb.c,v
diff -u -p -r1.147 in6_pcb.c
--- netinet6/in6_pcb.c 12 Feb 2025 21:28:11 -0000 1.147
+++ netinet6/in6_pcb.c 2 May 2025 20:11:22 -0000
@@ -336,9 +336,10 @@ in6_pcbconnect(struct inpcb *inp, struct
mtx_leave(&table->inpt_mtx);
inp->inp_flowinfo &= ~IPV6_FLOWLABEL_MASK;
- if (ip6_auto_flowlabel)
+ if (ip6_auto_flowlabel) {
inp->inp_flowinfo |=
(htonl(ip6_randomflowlabel()) & IPV6_FLOWLABEL_MASK);
+ }
#if NSTOEPLITZ > 0
inp->inp_flowid = stoeplitz_ip6port(&inp->inp_faddr6,
&inp->inp_laddr6, inp->inp_fport, inp->inp_lport);
@@ -702,4 +703,42 @@ in6_pcblookup_listen(struct inpcbtable *
}
#endif
return (inp);
+}
+
+int
+in6_pcbset_addr(struct inpcb *inp, const struct sockaddr_in6 *fsin6,
+ const struct sockaddr_in6 *lsin6, u_int rtableid)
+{
+ struct inpcbtable *table = inp->inp_table;
+ struct inpcb *t;
+
+ mtx_enter(&table->inpt_mtx);
+
+ t = in6_pcblookup_lock(inp->inp_table, &fsin6->sin6_addr,
+ fsin6->sin6_port, &lsin6->sin6_addr, lsin6->sin6_port,
+ rtableid, IN_PCBLOCK_HOLD);
+ if (t != NULL) {
+ mtx_leave(&table->inpt_mtx);
+ return (EADDRINUSE);
+ }
+
+ inp->inp_rtableid = rtableid;
+ inp->inp_laddr6 = lsin6->sin6_addr;
+ inp->inp_lport = lsin6->sin6_port;
+ inp->inp_faddr6 = fsin6->sin6_addr;
+ inp->inp_fport = fsin6->sin6_port;
+ in_pcbrehash(inp);
+
+ mtx_leave(&table->inpt_mtx);
+
+ inp->inp_flowinfo &= ~IPV6_FLOWLABEL_MASK;
+ if (ip6_auto_flowlabel) {
+ inp->inp_flowinfo |=
+ (htonl(ip6_randomflowlabel()) & IPV6_FLOWLABEL_MASK);
+ }
+#if NSTOEPLITZ > 0
+ inp->inp_flowid = stoeplitz_ip6port(&inp->inp_faddr6,
+ &inp->inp_laddr6, inp->inp_fport, inp->inp_lport);
+#endif
+ return (0);
}
TCP race in syn cache get