From: Vitaliy Makkoveev Subject: Re: tcp(4): unlock accept(2) To: Alexander Bluhm Cc: OpenBSD tech Date: Sun, 5 Jan 2025 15:24:21 +0300 > On 5 Jan 2025, at 13:49, Alexander Bluhm wrote: > > On Sun, Jan 05, 2025 at 12:08:15AM +0300, Vitaliy Makkoveev wrote: >> Makes sense because accept(2) could be fast path. tcp_accept() is the >> only in_setpeeraddr() which copies `inp_fport' and `inp_faddr' to passed >> mbuf(9). Shared netlock with `so_lock' taken is pretty enough, but both >> sockets should be locked simultaneously. >> >> So yet another solock() variation. The second arg of doaccept_so*lock() >> controls shared netlock acquisition and release. tcp(4) sockets are >> PR_MPSOCKET sockets, so soleep_nsec() will be happy. We have some raw >> inet6 sockets which are not PR_MPSOCKET, but they never follow this >> path. > > The inet6 sockets without PR_MPSOCKET are irrelvant. They have no > pr_usrreqs, socket layer is handled by rip6. I have a diff that > removes PR_MPSOCKET completely. > >> Note, we modify `so_qlen' of listening socket but filt_soread() takes >> only shared netlock. This should be enough because we cache `so_qlen' >> to local variable. > > Yes, I did put a READ_ONCE(so->so_qlen) there. It seems we could avoid socket lock at least in filt_sowrite() and filt_soexcept(). > > OK bluhm@ > >> Index: sys/kern/uipc_syscalls.c >> =================================================================== >> RCS file: /cvs/src/sys/kern/uipc_syscalls.c,v >> diff -u -p -r1.220 uipc_syscalls.c >> --- sys/kern/uipc_syscalls.c 5 Nov 2024 09:14:19 -0000 1.220 >> +++ sys/kern/uipc_syscalls.c 4 Jan 2025 20:33:31 -0000 >> @@ -247,6 +247,34 @@ sys_accept4(struct proc *p, void *v, reg >> SCARG(uap, anamelen), SCARG(uap, flags), retval)); >> } >> >> +void >> +doaccept_solock(struct socket *so, int take_netlock) >> +{ >> + if (take_netlock) { >> + switch (so->so_proto->pr_domain->dom_family) { >> + case PF_INET: >> + case PF_INET6: >> + NET_LOCK_SHARED(); >> + } >> + } >> + >> + rw_enter_write(&so->so_lock); >> +} >> + >> +void >> +doaccept_sounlock(struct socket *so, int release_netlock) >> +{ >> + rw_exit_write(&so->so_lock); >> + >> + if (release_netlock) { >> + switch (so->so_proto->pr_domain->dom_family) { >> + case PF_INET: >> + case PF_INET6: >> + NET_UNLOCK_SHARED(); >> + } >> + } >> +} >> + >> int >> doaccept(struct proc *p, int sock, struct sockaddr *name, socklen_t *anamelen, >> int flags, register_t *retval) >> @@ -257,7 +285,7 @@ doaccept(struct proc *p, int sock, struc >> socklen_t namelen; >> int error, tmpfd; >> struct socket *head, *so; >> - int cloexec, nflag, persocket; >> + int cloexec, nflag; >> >> cloexec = (flags & SOCK_CLOEXEC) ? UF_EXCLOSE : 0; >> >> @@ -279,9 +307,7 @@ doaccept(struct proc *p, int sock, struc >> nam = m_get(M_WAIT, MT_SONAME); >> >> head = headfp->f_data; >> - solock(head); >> - >> - persocket = solock_persocket(head); >> + doaccept_solock(head, 1); >> >> if (isdnssocket(head) || (head->so_options & SO_ACCEPTCONN) == 0) { >> error = EINVAL; >> @@ -315,8 +341,7 @@ doaccept(struct proc *p, int sock, struc >> */ >> so = TAILQ_FIRST(&head->so_q); >> >> - if (persocket) >> - solock(so); >> + doaccept_solock(so, 0); >> >> if (soqremque(so, 1) == 0) >> panic("accept"); >> @@ -328,8 +353,7 @@ doaccept(struct proc *p, int sock, struc >> /* connection has been removed from the listen queue */ >> knote(&head->so_rcv.sb_klist, 0); >> >> - if (persocket) >> - sounlock(head); >> + doaccept_sounlock(head, 0); >> >> fp->f_type = DTYPE_SOCKET; >> fp->f_flag = FREAD | FWRITE | nflag; >> @@ -338,10 +362,7 @@ doaccept(struct proc *p, int sock, struc >> >> error = soaccept(so, nam); >> >> - if (persocket) >> - sounlock(so); >> - else >> - sounlock(head); >> + doaccept_sounlock(so, 1); >> >> if (error) >> goto out; >> @@ -364,7 +385,7 @@ doaccept(struct proc *p, int sock, struc >> return 0; >> >> out_unlock: >> - sounlock(head); >> + doaccept_sounlock(head, 1); >> out: >> fdplock(fdp); >> fdremove(fdp, tmpfd);