From: Vitaliy Makkoveev Subject: tcp(4): unlock accept(2) To: Alexander Bluhm , tech@openbsd.org Date: Sun, 5 Jan 2025 00:08:15 +0300 Makes sense because accept(2) could be fast path. tcp_accept() is the only in_setpeeraddr() which copies `inp_fport' and `inp_faddr' to passed mbuf(9). Shared netlock with `so_lock' taken is pretty enough, but both sockets should be locked simultaneously. So yet another solock() variation. The second arg of doaccept_so*lock() controls shared netlock acquisition and release. tcp(4) sockets are PR_MPSOCKET sockets, so soleep_nsec() will be happy. We have some raw inet6 sockets which are not PR_MPSOCKET, but they never follow this path. Note, we modify `so_qlen' of listening socket but filt_soread() takes only shared netlock. This should be enough because we cache `so_qlen' to local variable. Index: sys/kern/uipc_syscalls.c =================================================================== RCS file: /cvs/src/sys/kern/uipc_syscalls.c,v diff -u -p -r1.220 uipc_syscalls.c --- sys/kern/uipc_syscalls.c 5 Nov 2024 09:14:19 -0000 1.220 +++ sys/kern/uipc_syscalls.c 4 Jan 2025 20:33:31 -0000 @@ -247,6 +247,34 @@ sys_accept4(struct proc *p, void *v, reg SCARG(uap, anamelen), SCARG(uap, flags), retval)); } +void +doaccept_solock(struct socket *so, int take_netlock) +{ + if (take_netlock) { + switch (so->so_proto->pr_domain->dom_family) { + case PF_INET: + case PF_INET6: + NET_LOCK_SHARED(); + } + } + + rw_enter_write(&so->so_lock); +} + +void +doaccept_sounlock(struct socket *so, int release_netlock) +{ + rw_exit_write(&so->so_lock); + + if (release_netlock) { + switch (so->so_proto->pr_domain->dom_family) { + case PF_INET: + case PF_INET6: + NET_UNLOCK_SHARED(); + } + } +} + int doaccept(struct proc *p, int sock, struct sockaddr *name, socklen_t *anamelen, int flags, register_t *retval) @@ -257,7 +285,7 @@ doaccept(struct proc *p, int sock, struc socklen_t namelen; int error, tmpfd; struct socket *head, *so; - int cloexec, nflag, persocket; + int cloexec, nflag; cloexec = (flags & SOCK_CLOEXEC) ? UF_EXCLOSE : 0; @@ -279,9 +307,7 @@ doaccept(struct proc *p, int sock, struc nam = m_get(M_WAIT, MT_SONAME); head = headfp->f_data; - solock(head); - - persocket = solock_persocket(head); + doaccept_solock(head, 1); if (isdnssocket(head) || (head->so_options & SO_ACCEPTCONN) == 0) { error = EINVAL; @@ -315,8 +341,7 @@ doaccept(struct proc *p, int sock, struc */ so = TAILQ_FIRST(&head->so_q); - if (persocket) - solock(so); + doaccept_solock(so, 0); if (soqremque(so, 1) == 0) panic("accept"); @@ -328,8 +353,7 @@ doaccept(struct proc *p, int sock, struc /* connection has been removed from the listen queue */ knote(&head->so_rcv.sb_klist, 0); - if (persocket) - sounlock(head); + doaccept_sounlock(head, 0); fp->f_type = DTYPE_SOCKET; fp->f_flag = FREAD | FWRITE | nflag; @@ -338,10 +362,7 @@ doaccept(struct proc *p, int sock, struc error = soaccept(so, nam); - if (persocket) - sounlock(so); - else - sounlock(head); + doaccept_sounlock(so, 1); if (error) goto out; @@ -364,7 +385,7 @@ doaccept(struct proc *p, int sock, struc return 0; out_unlock: - sounlock(head); + doaccept_sounlock(head, 1); out: fdplock(fdp); fdremove(fdp, tmpfd);