Download raw body.
TCP race in syn cache get
> On 3 May 2025, at 00:14, Alexander Bluhm <bluhm@openbsd.org> wrote:
>
> Hi,
>
> When testing my TCP input unlocking diff, Mark Patruck found a race.
>
> Setting the local and foreign address of a newly created socket did
> not happen atomically. During socket setup there was a small window
> for an incpb that had a bound laddr, but faddr was emtpy. Although
> both listen and new socket are locked during syn_cache_get(),
> in_pcblookup_listen() could find the incpb of the new socket. When
> a SYN packet of another connection arrived in parallel, it was
> processed with the socket under construction instead of the listen
> socket.
>
> My fix below sets both faddr and laddr together in in_pcbset_addr().
> The relevant code has been copied from in_pcbconnect(). Mutex
> table->inpt_mtx guarantees that in_pcblookup_listen() finds the
> listen socket.
>
> ok?
>
ok mvs
> bluhm
>
> Index: netinet/in_pcb.c
> ===================================================================
> RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/in_pcb.c,v
> diff -u -p -r1.312 in_pcb.c
> --- netinet/in_pcb.c 12 Feb 2025 21:28:10 -0000 1.312
> +++ netinet/in_pcb.c 2 May 2025 20:07:01 -0000
> @@ -1330,33 +1330,50 @@ in_pcbset_rtableid(struct inpcb *inp, u_
> return (0);
> }
>
> -void
> -in_pcbset_laddr(struct inpcb *inp, const struct sockaddr *sa, u_int rtableid)
> +int
> +in_pcbset_addr(struct inpcb *inp, const struct sockaddr *fsa,
> + const struct sockaddr *lsa, u_int rtableid)
> {
> struct inpcbtable *table = inp->inp_table;
> + const struct sockaddr_in *fsin, *lsin;
> + struct inpcb *t;
>
> - mtx_enter(&table->inpt_mtx);
> - inp->inp_rtableid = rtableid;
> #ifdef INET6
> if (ISSET(inp->inp_flags, INP_IPV6)) {
> - const struct sockaddr_in6 *sin6;
> -
> - KASSERT(sa->sa_family == AF_INET6);
> - sin6 = satosin6_const(sa);
> - inp->inp_lport = sin6->sin6_port;
> - inp->inp_laddr6 = sin6->sin6_addr;
> - } else
> + KASSERT(fsa->sa_family == AF_INET6);
> + KASSERT(lsa->sa_family == AF_INET6);
> + return in6_pcbset_addr(inp, satosin6_const(fsa),
> + satosin6_const(lsa), rtableid);
> + }
> #endif
> - {
> - const struct sockaddr_in *sin;
> + KASSERT(fsa->sa_family == AF_INET);
> + KASSERT(lsa->sa_family == AF_INET);
> + fsin = satosin_const(fsa);
> + lsin = satosin_const(lsa);
> +
> + mtx_enter(&table->inpt_mtx);
>
> - KASSERT(sa->sa_family == AF_INET);
> - sin = satosin_const(sa);
> - inp->inp_lport = sin->sin_port;
> - inp->inp_laddr = sin->sin_addr;
> + t = in_pcblookup_lock(inp->inp_table, fsin->sin_addr, fsin->sin_port,
> + lsin->sin_addr, lsin->sin_port, rtableid, IN_PCBLOCK_HOLD);
> + if (t != NULL) {
> + mtx_leave(&table->inpt_mtx);
> + return (EADDRINUSE);
> }
> +
> + inp->inp_rtableid = rtableid;
> + inp->inp_laddr = lsin->sin_addr;
> + inp->inp_lport = lsin->sin_port;
> + inp->inp_faddr = fsin->sin_addr;
> + inp->inp_fport = fsin->sin_port;
> in_pcbrehash(inp);
> +
> mtx_leave(&table->inpt_mtx);
> +
> +#if NSTOEPLITZ > 0
> + inp->inp_flowid = stoeplitz_ip4port(inp->inp_faddr.s_addr,
> + inp->inp_laddr.s_addr, inp->inp_fport, inp->inp_lport);
> +#endif
> + return (0);
> }
>
> void
> Index: netinet/in_pcb.h
> ===================================================================
> RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/in_pcb.h,v
> diff -u -p -r1.166 in_pcb.h
> --- netinet/in_pcb.h 2 Mar 2025 21:28:32 -0000 1.166
> +++ netinet/in_pcb.h 2 May 2025 20:10:52 -0000
> @@ -323,8 +323,8 @@ struct inpcb *
> void in_pcb_iterator_abort(struct inpcbtable *, struct inpcb *,
> struct inpcb_iterator *);
> struct inpcb *
> - in_pcblookup(struct inpcbtable *, struct in_addr,
> - u_int, struct in_addr, u_int, u_int);
> + in_pcblookup(struct inpcbtable *, struct in_addr, u_int,
> + struct in_addr, u_int, u_int);
> struct inpcb *
> in_pcblookup_listen(struct inpcbtable *, struct in_addr, u_int,
> struct mbuf *, u_int);
> @@ -373,7 +373,10 @@ void in6_pcbnotify(struct inpcbtable *,
> void (*)(struct inpcb *, int));
> int in6_selecthlim(const struct inpcb *);
> int in_pcbset_rtableid(struct inpcb *, u_int);
> -void in_pcbset_laddr(struct inpcb *, const struct sockaddr *, u_int);
> +int in_pcbset_addr(struct inpcb *, const struct sockaddr *,
> + const struct sockaddr *, u_int);
> +int in6_pcbset_addr(struct inpcb *, const struct sockaddr_in6 *,
> + const struct sockaddr_in6 *, u_int);
> void in_pcbunset_faddr(struct inpcb *);
> void in_pcbunset_laddr(struct inpcb *);
>
> Index: netinet/tcp_input.c
> ===================================================================
> RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/tcp_input.c,v
> diff -u -p -r1.442 tcp_input.c
> --- netinet/tcp_input.c 29 Apr 2025 20:31:42 -0000 1.442
> +++ netinet/tcp_input.c 2 May 2025 17:19:53 -0000
> @@ -3542,7 +3542,6 @@ syn_cache_get(struct sockaddr *src, stru
> struct socket *listenso;
> struct inpcb *inp, *listeninp;
> struct tcpcb *tp = NULL;
> - struct mbuf *am;
> u_int rtableid;
>
> NET_ASSERT_LOCKED();
> @@ -3629,24 +3628,14 @@ syn_cache_get(struct sockaddr *src, stru
> rtableid = divert->rdomain;
> }
> #endif
> - in_pcbset_laddr(inp, dst, rtableid);
> + if (in_pcbset_addr(inp, src, dst, rtableid))
> + goto resetandabort;
>
> /*
> * Give the new socket our cached route reference.
> */
> inp->inp_route = sc->sc_route; /* struct assignment */
> sc->sc_route.ro_rt = NULL;
> -
> - am = m_get(M_DONTWAIT, MT_SONAME); /* XXX */
> - if (am == NULL)
> - goto resetandabort;
> - am->m_len = src->sa_len;
> - memcpy(mtod(am, caddr_t), src, src->sa_len);
> - if (in_pcbconnect(inp, am)) {
> - (void) m_free(am);
> - goto resetandabort;
> - }
> - (void) m_free(am);
>
> tp->t_flags = intotcpcb(listeninp)->t_flags & (TF_NOPUSH|TF_NODELAY);
> if (sc->sc_request_r_scale != 15) {
> Index: netinet6/in6_pcb.c
> ===================================================================
> RCS file: /data/mirror/openbsd/cvs/src/sys/netinet6/in6_pcb.c,v
> diff -u -p -r1.147 in6_pcb.c
> --- netinet6/in6_pcb.c 12 Feb 2025 21:28:11 -0000 1.147
> +++ netinet6/in6_pcb.c 2 May 2025 20:11:22 -0000
> @@ -336,9 +336,10 @@ in6_pcbconnect(struct inpcb *inp, struct
> mtx_leave(&table->inpt_mtx);
>
> inp->inp_flowinfo &= ~IPV6_FLOWLABEL_MASK;
> - if (ip6_auto_flowlabel)
> + if (ip6_auto_flowlabel) {
> inp->inp_flowinfo |=
> (htonl(ip6_randomflowlabel()) & IPV6_FLOWLABEL_MASK);
> + }
> #if NSTOEPLITZ > 0
> inp->inp_flowid = stoeplitz_ip6port(&inp->inp_faddr6,
> &inp->inp_laddr6, inp->inp_fport, inp->inp_lport);
> @@ -702,4 +703,42 @@ in6_pcblookup_listen(struct inpcbtable *
> }
> #endif
> return (inp);
> +}
> +
> +int
> +in6_pcbset_addr(struct inpcb *inp, const struct sockaddr_in6 *fsin6,
> + const struct sockaddr_in6 *lsin6, u_int rtableid)
> +{
> + struct inpcbtable *table = inp->inp_table;
> + struct inpcb *t;
> +
> + mtx_enter(&table->inpt_mtx);
> +
> + t = in6_pcblookup_lock(inp->inp_table, &fsin6->sin6_addr,
> + fsin6->sin6_port, &lsin6->sin6_addr, lsin6->sin6_port,
> + rtableid, IN_PCBLOCK_HOLD);
> + if (t != NULL) {
> + mtx_leave(&table->inpt_mtx);
> + return (EADDRINUSE);
> + }
> +
> + inp->inp_rtableid = rtableid;
> + inp->inp_laddr6 = lsin6->sin6_addr;
> + inp->inp_lport = lsin6->sin6_port;
> + inp->inp_faddr6 = fsin6->sin6_addr;
> + inp->inp_fport = fsin6->sin6_port;
> + in_pcbrehash(inp);
> +
> + mtx_leave(&table->inpt_mtx);
> +
> + inp->inp_flowinfo &= ~IPV6_FLOWLABEL_MASK;
> + if (ip6_auto_flowlabel) {
> + inp->inp_flowinfo |=
> + (htonl(ip6_randomflowlabel()) & IPV6_FLOWLABEL_MASK);
> + }
> +#if NSTOEPLITZ > 0
> + inp->inp_flowid = stoeplitz_ip6port(&inp->inp_faddr6,
> + &inp->inp_laddr6, inp->inp_fport, inp->inp_lport);
> +#endif
> + return (0);
> }
>
TCP race in syn cache get