From: Alexander Bluhm Subject: Re: Remove `head' socket relocking from sonewconn() To: Vitaliy Makkoveev Cc: tech@openbsd.org Date: Wed, 10 Apr 2024 13:39:00 +0200 On Tue, Apr 09, 2024 at 03:42:04PM +0300, Vitaliy Makkoveev wrote: > uipc_attach() releases solock() because it should be taken after > `unp_gc_lock' rwlock(9) which protects the `unp_link' list. For this > reason, the listening `head' socket should be unlocked too while > sonewconn() calls uipc_attach(). This could be reworked because now > `so_rcv' sockbuf relies on `sb_mtx' mutex(9). > > The last one `unp_link' foreach loop within unp_gc() discards sockets > previously marked as UNP_GCDEAD. These sockets are not accessed from the > userland. The only exception is the sosend() threads of connected > sending peers, but they only sbappend*() mbuf(9) to `so_rcv'. So it's > enough to unlink mbuf(9) chain with `sb_mtx' held and discard lockless. > > Please note, the existing SS_NEWCONN_WAIT logic was never used because > the listening unix(4) socket protected from concurrent unp_detach() by > vnode(9) lock, however `head' relocked all times. > > This diff conflicts with my "Don't take solock() in soreceive..." [1] > diff, but since it stuck I want to commit this one first. > > 1. https://marc.info/?t=171191840300002&r=1&w=2 passed regress with witness OK bluhm@ > Index: sys/kern/uipc_socket.c > =================================================================== > RCS file: /cvs/src/sys/kern/uipc_socket.c,v > retrieving revision 1.327 > diff -u -p -r1.327 uipc_socket.c > --- sys/kern/uipc_socket.c 2 Apr 2024 14:23:15 -0000 1.327 > +++ sys/kern/uipc_socket.c 9 Apr 2024 10:01:52 -0000 > @@ -65,6 +65,7 @@ void sotask(void *); > void soreaper(void *); > void soput(void *); > int somove(struct socket *, int); > +void sorflush(struct socket *); > > void filt_sordetach(struct knote *kn); > int filt_soread(struct knote *kn, long hint); > @@ -413,15 +414,6 @@ drop: > } > if (so->so_options & SO_ACCEPTCONN) { > int persocket = solock_persocket(so); > - > - if (persocket) { > - /* Wait concurrent sonewconn() threads. */ > - while (so->so_newconn > 0) { > - so->so_state |= SS_NEWCONN_WAIT; > - sosleep_nsec(so, &so->so_newconn, PSOCK, > - "newcon", INFSLP); > - } > - } > > while ((so2 = TAILQ_FIRST(&so->so_q0)) != NULL) { > if (persocket) > Index: sys/kern/uipc_socket2.c > =================================================================== > RCS file: /cvs/src/sys/kern/uipc_socket2.c,v > retrieving revision 1.147 > diff -u -p -r1.147 uipc_socket2.c > --- sys/kern/uipc_socket2.c 31 Mar 2024 13:50:00 -0000 1.147 > +++ sys/kern/uipc_socket2.c 9 Apr 2024 10:01:52 -0000 > @@ -179,7 +179,7 @@ sonewconn(struct socket *head, int conns > { > struct socket *so; > int persocket = solock_persocket(head); > - int error; > + int soqueue = connstatus ? 1 : 0; > > /* > * XXXSMP as long as `so' and `head' share the same lock, we > @@ -232,41 +232,13 @@ sonewconn(struct socket *head, int conns > > sigio_copy(&so->so_sigio, &head->so_sigio); > > - soqinsque(head, so, 0); > - > - /* > - * We need to unlock `head' because PCB layer could release > - * solock() to enforce desired lock order. > - */ > - if (persocket) { > - head->so_newconn++; > - sounlock(head); > - } > - > - error = pru_attach(so, 0, wait); > - > - if (persocket) { > - sounlock(so); > - solock(head); > - solock(so); > - > - if ((head->so_newconn--) == 0) { > - if ((head->so_state & SS_NEWCONN_WAIT) != 0) { > - head->so_state &= ~SS_NEWCONN_WAIT; > - wakeup(&head->so_newconn); > - } > - } > - } > - > - if (error) { > - soqremque(so, 0); > + soqinsque(head, so, soqueue); > + if (pru_attach(so, 0, wait) != 0) { > + soqremque(so, soqueue); > goto fail; > } > - > if (connstatus) { > so->so_state |= connstatus; > - soqremque(so, 0); > - soqinsque(head, so, 1); > sorwakeup(head); > wakeup(&head->so_timeo); > } > Index: sys/kern/uipc_usrreq.c > =================================================================== > RCS file: /cvs/src/sys/kern/uipc_usrreq.c,v > retrieving revision 1.203 > diff -u -p -r1.203 uipc_usrreq.c > --- sys/kern/uipc_usrreq.c 26 Mar 2024 09:46:47 -0000 1.203 > +++ sys/kern/uipc_usrreq.c 9 Apr 2024 10:01:52 -0000 > @@ -293,14 +293,10 @@ uipc_attach(struct socket *so, int proto > so->so_pcb = unp; > getnanotime(&unp->unp_ctime); > > - /* > - * Enforce `unp_gc_lock' -> `solock()' lock order. > - */ > - sounlock(so); > rw_enter_write(&unp_gc_lock); > LIST_INSERT_HEAD(&unp_head, unp, unp_link); > rw_exit_write(&unp_gc_lock); > - solock(so); > + > return (0); > } > > @@ -753,7 +749,6 @@ unp_detach(struct unpcb *unp) > unp->unp_vnode = NULL; > > /* > - * Enforce `unp_gc_lock' -> `solock()' lock order. > * Enforce `i_lock' -> `solock()' lock order. > */ > sounlock(so); > @@ -1443,16 +1438,26 @@ unp_gc(void *arg __unused) > if (nunref) { > LIST_FOREACH(unp, &unp_head, unp_link) { > if (unp->unp_gcflags & UNP_GCDEAD) { > + struct sockbuf *sb = &unp->unp_socket->so_rcv; > + struct mbuf *m; > + > /* > * This socket could still be connected > * and if so it's `so_rcv' is still > * accessible by concurrent PRU_SEND > * thread. > */ > - so = unp->unp_socket; > - solock(so); > - sorflush(so); > - sounlock(so); > + > + mtx_enter(&sb->sb_mtx); > + m = sb->sb_mb; > + memset(&sb->sb_startzero, 0, > + (caddr_t)&sb->sb_endzero - > + (caddr_t)&sb->sb_startzero); > + sb->sb_timeo_nsecs = INFSLP; > + mtx_leave(&sb->sb_mtx); > + > + unp_scan(m, unp_discard); > + m_purge(m); > } > } > } > Index: sys/sys/socketvar.h > =================================================================== > RCS file: /cvs/src/sys/sys/socketvar.h,v > retrieving revision 1.127 > diff -u -p -r1.127 socketvar.h > --- sys/sys/socketvar.h 27 Mar 2024 22:47:53 -0000 1.127 > +++ sys/sys/socketvar.h 9 Apr 2024 10:01:52 -0000 > @@ -86,7 +86,6 @@ struct socket { > short so_q0len; /* partials on so_q0 */ > short so_qlen; /* number of connections on so_q */ > short so_qlimit; /* max number queued connections */ > - u_long so_newconn; /* # of pending sonewconn() threads */ > short so_timeo; /* connection timeout */ > u_long so_oobmark; /* chars to oob mark */ > u_int so_error; /* error affecting connection */ > @@ -169,8 +168,7 @@ struct socket { > #define SS_CONNECTOUT 0x1000 /* connect, not accept, at this end */ > #define SS_ISSENDING 0x2000 /* hint for lower layer */ > #define SS_DNS 0x4000 /* created using SOCK_DNS socket(2) */ > -#define SS_NEWCONN_WAIT 0x8000 /* waiting sonewconn() relock */ > -#define SS_YP 0x10000 /* created using ypconnect(2) */ > +#define SS_YP 0x8000 /* created using ypconnect(2) */ > > #ifdef _KERNEL > > @@ -400,7 +398,6 @@ int sosend(struct socket *, struct mbuf > struct mbuf *, struct mbuf *, int); > int sosetopt(struct socket *, int, int, struct mbuf *); > int soshutdown(struct socket *, int); > -void sorflush(struct socket *); > void sowakeup(struct socket *, struct sockbuf *); > void sorwakeup(struct socket *); > void sowwakeup(struct socket *);