From: Alexander Bluhm Subject: Re: tcp(4): unlock accept(2) To: Vitaliy Makkoveev Cc: tech@openbsd.org Date: Sun, 5 Jan 2025 11:49:50 +0100 On Sun, Jan 05, 2025 at 12:08:15AM +0300, Vitaliy Makkoveev wrote: > Makes sense because accept(2) could be fast path. tcp_accept() is the > only in_setpeeraddr() which copies `inp_fport' and `inp_faddr' to passed > mbuf(9). Shared netlock with `so_lock' taken is pretty enough, but both > sockets should be locked simultaneously. > > So yet another solock() variation. The second arg of doaccept_so*lock() > controls shared netlock acquisition and release. tcp(4) sockets are > PR_MPSOCKET sockets, so soleep_nsec() will be happy. We have some raw > inet6 sockets which are not PR_MPSOCKET, but they never follow this > path. The inet6 sockets without PR_MPSOCKET are irrelvant. They have no pr_usrreqs, socket layer is handled by rip6. I have a diff that removes PR_MPSOCKET completely. > Note, we modify `so_qlen' of listening socket but filt_soread() takes > only shared netlock. This should be enough because we cache `so_qlen' > to local variable. Yes, I did put a READ_ONCE(so->so_qlen) there. OK bluhm@ > Index: sys/kern/uipc_syscalls.c > =================================================================== > RCS file: /cvs/src/sys/kern/uipc_syscalls.c,v > diff -u -p -r1.220 uipc_syscalls.c > --- sys/kern/uipc_syscalls.c 5 Nov 2024 09:14:19 -0000 1.220 > +++ sys/kern/uipc_syscalls.c 4 Jan 2025 20:33:31 -0000 > @@ -247,6 +247,34 @@ sys_accept4(struct proc *p, void *v, reg > SCARG(uap, anamelen), SCARG(uap, flags), retval)); > } > > +void > +doaccept_solock(struct socket *so, int take_netlock) > +{ > + if (take_netlock) { > + switch (so->so_proto->pr_domain->dom_family) { > + case PF_INET: > + case PF_INET6: > + NET_LOCK_SHARED(); > + } > + } > + > + rw_enter_write(&so->so_lock); > +} > + > +void > +doaccept_sounlock(struct socket *so, int release_netlock) > +{ > + rw_exit_write(&so->so_lock); > + > + if (release_netlock) { > + switch (so->so_proto->pr_domain->dom_family) { > + case PF_INET: > + case PF_INET6: > + NET_UNLOCK_SHARED(); > + } > + } > +} > + > int > doaccept(struct proc *p, int sock, struct sockaddr *name, socklen_t *anamelen, > int flags, register_t *retval) > @@ -257,7 +285,7 @@ doaccept(struct proc *p, int sock, struc > socklen_t namelen; > int error, tmpfd; > struct socket *head, *so; > - int cloexec, nflag, persocket; > + int cloexec, nflag; > > cloexec = (flags & SOCK_CLOEXEC) ? UF_EXCLOSE : 0; > > @@ -279,9 +307,7 @@ doaccept(struct proc *p, int sock, struc > nam = m_get(M_WAIT, MT_SONAME); > > head = headfp->f_data; > - solock(head); > - > - persocket = solock_persocket(head); > + doaccept_solock(head, 1); > > if (isdnssocket(head) || (head->so_options & SO_ACCEPTCONN) == 0) { > error = EINVAL; > @@ -315,8 +341,7 @@ doaccept(struct proc *p, int sock, struc > */ > so = TAILQ_FIRST(&head->so_q); > > - if (persocket) > - solock(so); > + doaccept_solock(so, 0); > > if (soqremque(so, 1) == 0) > panic("accept"); > @@ -328,8 +353,7 @@ doaccept(struct proc *p, int sock, struc > /* connection has been removed from the listen queue */ > knote(&head->so_rcv.sb_klist, 0); > > - if (persocket) > - sounlock(head); > + doaccept_sounlock(head, 0); > > fp->f_type = DTYPE_SOCKET; > fp->f_flag = FREAD | FWRITE | nflag; > @@ -338,10 +362,7 @@ doaccept(struct proc *p, int sock, struc > > error = soaccept(so, nam); > > - if (persocket) > - sounlock(so); > - else > - sounlock(head); > + doaccept_sounlock(so, 1); > > if (error) > goto out; > @@ -364,7 +385,7 @@ doaccept(struct proc *p, int sock, struc > return 0; > > out_unlock: > - sounlock(head); > + doaccept_sounlock(head, 1); > out: > fdplock(fdp); > fdremove(fdp, tmpfd);