From: Vitaliy Makkoveev Subject: Re: keep socket lock in sonewconn To: Alexander Bluhm Cc: tech@openbsd.org Date: Sat, 25 Jan 2025 18:37:49 +0300 On Fri, Jan 24, 2025 at 10:02:09AM +0100, Alexander Bluhm wrote: > Hi, > > For TCP input unlocking we need a consistent lock of a newly created > socket. Instead of releasing the lock in sonewconn() and grabbing > it again later, I think it is better that sonewconn() returns a > locked socket. > > Currently I only changed syn_cache_get() which calls in_pcbsounlock_rele() > at the end. Following diffs will push the unlock into tcp_input(). > > ok? UNIX sockets are much complicated in this path, because we need to handle 3 sockets and keep right locking order between them. Also, at his stage newly created UNIX socket linked to `so_q0' and accessible only from garbage collector, which will not close it. However, this is not true for TCP socket, because connection is already established and concurrent FIN or RST packet could start demolition. ok mvs, but keep in mind locking order between sockets, we always lock listening socket aka `head' first. Isn't it better to pass `oldso' to sonewconn()? Does the `head' looks more reasonable here? > + oldso = so; > + oldinp = inp; > so = sonewconn(so, SS_ISCONNECTED, M_DONTWAIT); > > bluhm > > Index: kern/uipc_socket2.c > =================================================================== > RCS file: /data/mirror/openbsd/cvs/src/sys/kern/uipc_socket2.c,v > diff -u -p -r1.168 uipc_socket2.c > --- kern/uipc_socket2.c 22 Jan 2025 15:05:49 -0000 1.168 > +++ kern/uipc_socket2.c 23 Jan 2025 23:07:02 -0000 > @@ -237,8 +237,6 @@ sonewconn(struct socket *head, int conns > wakeup(&head->so_timeo); > } > > - sounlock_nonet(so); > - > return (so); > > fail: > Index: kern/uipc_usrreq.c > =================================================================== > RCS file: /data/mirror/openbsd/cvs/src/sys/kern/uipc_usrreq.c,v > diff -u -p -r1.213 uipc_usrreq.c > --- kern/uipc_usrreq.c 20 Jan 2025 16:34:48 -0000 1.213 > +++ kern/uipc_usrreq.c 23 Jan 2025 22:51:36 -0000 > @@ -890,18 +890,17 @@ unp_connect(struct socket *so, struct mb > > if ((so2->so_options & SO_ACCEPTCONN) == 0 || > (so3 = sonewconn(so2, 0, M_WAIT)) == NULL) { > + sounlock(so2); > error = ECONNREFUSED; > - } > - > - sounlock(so2); > - > - if (error != 0) > goto put; > + } > > /* > * Since `so2' is protected by vnode(9) lock, `so3' > * can't be PRU_ABORT'ed here. > */ > + sounlock(so2); > + sounlock(so3); > solock_pair(so, so3); > > unp2 = sotounpcb(so2); > Index: netinet/tcp_input.c > =================================================================== > RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/tcp_input.c,v > diff -u -p -r1.423 tcp_input.c > --- netinet/tcp_input.c 16 Jan 2025 11:59:20 -0000 1.423 > +++ netinet/tcp_input.c 23 Jan 2025 23:06:45 -0000 > @@ -3535,13 +3535,15 @@ syn_cache_get(struct sockaddr *src, stru > struct inpcb *inp, *oldinp; > struct tcpcb *tp = NULL; > struct mbuf *am; > - struct socket *oso; > + struct socket *oldso; > u_int rtableid; > > NET_ASSERT_LOCKED(); > > + inp = sotoinpcb(so); > + > mtx_enter(&syn_cache_mtx); > - sc = syn_cache_lookup(src, dst, &scp, sotoinpcb(so)->inp_rtableid); > + sc = syn_cache_lookup(src, dst, &scp, inp->inp_rtableid); > if (sc == NULL) { > mtx_leave(&syn_cache_mtx); > return (NULL); > @@ -3571,12 +3573,13 @@ syn_cache_get(struct sockaddr *src, stru > * connection when the SYN arrived. If we can't create > * the connection, abort it. > */ > - oso = so; > + oldso = so; > + oldinp = inp; > so = sonewconn(so, SS_ISCONNECTED, M_DONTWAIT); > if (so == NULL) > goto resetandabort; > - > - oldinp = sotoinpcb(oso); > + soassertlocked(so); > + soref(so); > inp = sotoinpcb(so); > > #ifdef IPSEC > @@ -3637,7 +3640,7 @@ syn_cache_get(struct sockaddr *src, stru > (void) m_free(am); > > tp = intotcpcb(inp); > - tp->t_flags = sototcpcb(oso)->t_flags & (TF_NOPUSH|TF_NODELAY); > + tp->t_flags = sototcpcb(oldso)->t_flags & (TF_NOPUSH|TF_NODELAY); > if (sc->sc_request_r_scale != 15) { > tp->requested_s_scale = sc->sc_requested_s_scale; > tp->request_r_scale = sc->sc_request_r_scale; > @@ -3649,7 +3652,6 @@ syn_cache_get(struct sockaddr *src, stru > tp->t_template = tcp_template(tp); > if (tp->t_template == 0) { > tp = tcp_drop(tp, ENOBUFS); /* destroys socket */ > - so = NULL; > goto abort; > } > tp->sack_enable = ISSET(sc->sc_fixflags, SCF_SACK_PERMIT); > @@ -3700,6 +3702,7 @@ syn_cache_get(struct sockaddr *src, stru > tp->rcv_adv = tp->rcv_nxt + sc->sc_win; > tp->last_ack_sent = tp->rcv_nxt; > > + in_pcbsounlock_rele(inp, so); > tcpstat_inc(tcps_sc_completed); > syn_cache_put(sc); > return (so); > @@ -3707,10 +3710,11 @@ syn_cache_get(struct sockaddr *src, stru > resetandabort: > tcp_respond(NULL, mtod(m, caddr_t), th, (tcp_seq)0, th->th_ack, TH_RST, > m->m_pkthdr.ph_rtableid, now); > -abort: > - m_freem(m); > if (so != NULL) > soabort(so); > +abort: > + m_freem(m); > + in_pcbsounlock_rele(inp, so); > syn_cache_put(sc); > tcpstat_inc(tcps_sc_aborted); > return ((struct socket *)(-1)); >