From: Alexander Bluhm Subject: TCP race in syn cache get To: tech@openbsd.org Date: Fri, 2 May 2025 23:14:01 +0200 Hi, When testing my TCP input unlocking diff, Mark Patruck found a race. Setting the local and foreign address of a newly created socket did not happen atomically. During socket setup there was a small window for an incpb that had a bound laddr, but faddr was emtpy. Although both listen and new socket are locked during syn_cache_get(), in_pcblookup_listen() could find the incpb of the new socket. When a SYN packet of another connection arrived in parallel, it was processed with the socket under construction instead of the listen socket. My fix below sets both faddr and laddr together in in_pcbset_addr(). The relevant code has been copied from in_pcbconnect(). Mutex table->inpt_mtx guarantees that in_pcblookup_listen() finds the listen socket. ok? bluhm Index: netinet/in_pcb.c =================================================================== RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/in_pcb.c,v diff -u -p -r1.312 in_pcb.c --- netinet/in_pcb.c 12 Feb 2025 21:28:10 -0000 1.312 +++ netinet/in_pcb.c 2 May 2025 20:07:01 -0000 @@ -1330,33 +1330,50 @@ in_pcbset_rtableid(struct inpcb *inp, u_ return (0); } -void -in_pcbset_laddr(struct inpcb *inp, const struct sockaddr *sa, u_int rtableid) +int +in_pcbset_addr(struct inpcb *inp, const struct sockaddr *fsa, + const struct sockaddr *lsa, u_int rtableid) { struct inpcbtable *table = inp->inp_table; + const struct sockaddr_in *fsin, *lsin; + struct inpcb *t; - mtx_enter(&table->inpt_mtx); - inp->inp_rtableid = rtableid; #ifdef INET6 if (ISSET(inp->inp_flags, INP_IPV6)) { - const struct sockaddr_in6 *sin6; - - KASSERT(sa->sa_family == AF_INET6); - sin6 = satosin6_const(sa); - inp->inp_lport = sin6->sin6_port; - inp->inp_laddr6 = sin6->sin6_addr; - } else + KASSERT(fsa->sa_family == AF_INET6); + KASSERT(lsa->sa_family == AF_INET6); + return in6_pcbset_addr(inp, satosin6_const(fsa), + satosin6_const(lsa), rtableid); + } #endif - { - const struct sockaddr_in *sin; + KASSERT(fsa->sa_family == AF_INET); + KASSERT(lsa->sa_family == AF_INET); + fsin = satosin_const(fsa); + lsin = satosin_const(lsa); + + mtx_enter(&table->inpt_mtx); - KASSERT(sa->sa_family == AF_INET); - sin = satosin_const(sa); - inp->inp_lport = sin->sin_port; - inp->inp_laddr = sin->sin_addr; + t = in_pcblookup_lock(inp->inp_table, fsin->sin_addr, fsin->sin_port, + lsin->sin_addr, lsin->sin_port, rtableid, IN_PCBLOCK_HOLD); + if (t != NULL) { + mtx_leave(&table->inpt_mtx); + return (EADDRINUSE); } + + inp->inp_rtableid = rtableid; + inp->inp_laddr = lsin->sin_addr; + inp->inp_lport = lsin->sin_port; + inp->inp_faddr = fsin->sin_addr; + inp->inp_fport = fsin->sin_port; in_pcbrehash(inp); + mtx_leave(&table->inpt_mtx); + +#if NSTOEPLITZ > 0 + inp->inp_flowid = stoeplitz_ip4port(inp->inp_faddr.s_addr, + inp->inp_laddr.s_addr, inp->inp_fport, inp->inp_lport); +#endif + return (0); } void Index: netinet/in_pcb.h =================================================================== RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/in_pcb.h,v diff -u -p -r1.166 in_pcb.h --- netinet/in_pcb.h 2 Mar 2025 21:28:32 -0000 1.166 +++ netinet/in_pcb.h 2 May 2025 20:10:52 -0000 @@ -323,8 +323,8 @@ struct inpcb * void in_pcb_iterator_abort(struct inpcbtable *, struct inpcb *, struct inpcb_iterator *); struct inpcb * - in_pcblookup(struct inpcbtable *, struct in_addr, - u_int, struct in_addr, u_int, u_int); + in_pcblookup(struct inpcbtable *, struct in_addr, u_int, + struct in_addr, u_int, u_int); struct inpcb * in_pcblookup_listen(struct inpcbtable *, struct in_addr, u_int, struct mbuf *, u_int); @@ -373,7 +373,10 @@ void in6_pcbnotify(struct inpcbtable *, void (*)(struct inpcb *, int)); int in6_selecthlim(const struct inpcb *); int in_pcbset_rtableid(struct inpcb *, u_int); -void in_pcbset_laddr(struct inpcb *, const struct sockaddr *, u_int); +int in_pcbset_addr(struct inpcb *, const struct sockaddr *, + const struct sockaddr *, u_int); +int in6_pcbset_addr(struct inpcb *, const struct sockaddr_in6 *, + const struct sockaddr_in6 *, u_int); void in_pcbunset_faddr(struct inpcb *); void in_pcbunset_laddr(struct inpcb *); Index: netinet/tcp_input.c =================================================================== RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/tcp_input.c,v diff -u -p -r1.442 tcp_input.c --- netinet/tcp_input.c 29 Apr 2025 20:31:42 -0000 1.442 +++ netinet/tcp_input.c 2 May 2025 17:19:53 -0000 @@ -3542,7 +3542,6 @@ syn_cache_get(struct sockaddr *src, stru struct socket *listenso; struct inpcb *inp, *listeninp; struct tcpcb *tp = NULL; - struct mbuf *am; u_int rtableid; NET_ASSERT_LOCKED(); @@ -3629,24 +3628,14 @@ syn_cache_get(struct sockaddr *src, stru rtableid = divert->rdomain; } #endif - in_pcbset_laddr(inp, dst, rtableid); + if (in_pcbset_addr(inp, src, dst, rtableid)) + goto resetandabort; /* * Give the new socket our cached route reference. */ inp->inp_route = sc->sc_route; /* struct assignment */ sc->sc_route.ro_rt = NULL; - - am = m_get(M_DONTWAIT, MT_SONAME); /* XXX */ - if (am == NULL) - goto resetandabort; - am->m_len = src->sa_len; - memcpy(mtod(am, caddr_t), src, src->sa_len); - if (in_pcbconnect(inp, am)) { - (void) m_free(am); - goto resetandabort; - } - (void) m_free(am); tp->t_flags = intotcpcb(listeninp)->t_flags & (TF_NOPUSH|TF_NODELAY); if (sc->sc_request_r_scale != 15) { Index: netinet6/in6_pcb.c =================================================================== RCS file: /data/mirror/openbsd/cvs/src/sys/netinet6/in6_pcb.c,v diff -u -p -r1.147 in6_pcb.c --- netinet6/in6_pcb.c 12 Feb 2025 21:28:11 -0000 1.147 +++ netinet6/in6_pcb.c 2 May 2025 20:11:22 -0000 @@ -336,9 +336,10 @@ in6_pcbconnect(struct inpcb *inp, struct mtx_leave(&table->inpt_mtx); inp->inp_flowinfo &= ~IPV6_FLOWLABEL_MASK; - if (ip6_auto_flowlabel) + if (ip6_auto_flowlabel) { inp->inp_flowinfo |= (htonl(ip6_randomflowlabel()) & IPV6_FLOWLABEL_MASK); + } #if NSTOEPLITZ > 0 inp->inp_flowid = stoeplitz_ip6port(&inp->inp_faddr6, &inp->inp_laddr6, inp->inp_fport, inp->inp_lport); @@ -702,4 +703,42 @@ in6_pcblookup_listen(struct inpcbtable * } #endif return (inp); +} + +int +in6_pcbset_addr(struct inpcb *inp, const struct sockaddr_in6 *fsin6, + const struct sockaddr_in6 *lsin6, u_int rtableid) +{ + struct inpcbtable *table = inp->inp_table; + struct inpcb *t; + + mtx_enter(&table->inpt_mtx); + + t = in6_pcblookup_lock(inp->inp_table, &fsin6->sin6_addr, + fsin6->sin6_port, &lsin6->sin6_addr, lsin6->sin6_port, + rtableid, IN_PCBLOCK_HOLD); + if (t != NULL) { + mtx_leave(&table->inpt_mtx); + return (EADDRINUSE); + } + + inp->inp_rtableid = rtableid; + inp->inp_laddr6 = lsin6->sin6_addr; + inp->inp_lport = lsin6->sin6_port; + inp->inp_faddr6 = fsin6->sin6_addr; + inp->inp_fport = fsin6->sin6_port; + in_pcbrehash(inp); + + mtx_leave(&table->inpt_mtx); + + inp->inp_flowinfo &= ~IPV6_FLOWLABEL_MASK; + if (ip6_auto_flowlabel) { + inp->inp_flowinfo |= + (htonl(ip6_randomflowlabel()) & IPV6_FLOWLABEL_MASK); + } +#if NSTOEPLITZ > 0 + inp->inp_flowid = stoeplitz_ip6port(&inp->inp_faddr6, + &inp->inp_laddr6, inp->inp_fport, inp->inp_lport); +#endif + return (0); }