Download raw body.
tcp(4): unlock accept(2)
On Sun, Jan 05, 2025 at 12:08:15AM +0300, Vitaliy Makkoveev wrote:
> Makes sense because accept(2) could be fast path. tcp_accept() is the
> only in_setpeeraddr() which copies `inp_fport' and `inp_faddr' to passed
> mbuf(9). Shared netlock with `so_lock' taken is pretty enough, but both
> sockets should be locked simultaneously.
>
> So yet another solock() variation. The second arg of doaccept_so*lock()
> controls shared netlock acquisition and release. tcp(4) sockets are
> PR_MPSOCKET sockets, so soleep_nsec() will be happy. We have some raw
> inet6 sockets which are not PR_MPSOCKET, but they never follow this
> path.
The inet6 sockets without PR_MPSOCKET are irrelvant. They have no
pr_usrreqs, socket layer is handled by rip6. I have a diff that
removes PR_MPSOCKET completely.
> Note, we modify `so_qlen' of listening socket but filt_soread() takes
> only shared netlock. This should be enough because we cache `so_qlen'
> to local variable.
Yes, I did put a READ_ONCE(so->so_qlen) there.
OK bluhm@
> Index: sys/kern/uipc_syscalls.c
> ===================================================================
> RCS file: /cvs/src/sys/kern/uipc_syscalls.c,v
> diff -u -p -r1.220 uipc_syscalls.c
> --- sys/kern/uipc_syscalls.c 5 Nov 2024 09:14:19 -0000 1.220
> +++ sys/kern/uipc_syscalls.c 4 Jan 2025 20:33:31 -0000
> @@ -247,6 +247,34 @@ sys_accept4(struct proc *p, void *v, reg
> SCARG(uap, anamelen), SCARG(uap, flags), retval));
> }
>
> +void
> +doaccept_solock(struct socket *so, int take_netlock)
> +{
> + if (take_netlock) {
> + switch (so->so_proto->pr_domain->dom_family) {
> + case PF_INET:
> + case PF_INET6:
> + NET_LOCK_SHARED();
> + }
> + }
> +
> + rw_enter_write(&so->so_lock);
> +}
> +
> +void
> +doaccept_sounlock(struct socket *so, int release_netlock)
> +{
> + rw_exit_write(&so->so_lock);
> +
> + if (release_netlock) {
> + switch (so->so_proto->pr_domain->dom_family) {
> + case PF_INET:
> + case PF_INET6:
> + NET_UNLOCK_SHARED();
> + }
> + }
> +}
> +
> int
> doaccept(struct proc *p, int sock, struct sockaddr *name, socklen_t *anamelen,
> int flags, register_t *retval)
> @@ -257,7 +285,7 @@ doaccept(struct proc *p, int sock, struc
> socklen_t namelen;
> int error, tmpfd;
> struct socket *head, *so;
> - int cloexec, nflag, persocket;
> + int cloexec, nflag;
>
> cloexec = (flags & SOCK_CLOEXEC) ? UF_EXCLOSE : 0;
>
> @@ -279,9 +307,7 @@ doaccept(struct proc *p, int sock, struc
> nam = m_get(M_WAIT, MT_SONAME);
>
> head = headfp->f_data;
> - solock(head);
> -
> - persocket = solock_persocket(head);
> + doaccept_solock(head, 1);
>
> if (isdnssocket(head) || (head->so_options & SO_ACCEPTCONN) == 0) {
> error = EINVAL;
> @@ -315,8 +341,7 @@ doaccept(struct proc *p, int sock, struc
> */
> so = TAILQ_FIRST(&head->so_q);
>
> - if (persocket)
> - solock(so);
> + doaccept_solock(so, 0);
>
> if (soqremque(so, 1) == 0)
> panic("accept");
> @@ -328,8 +353,7 @@ doaccept(struct proc *p, int sock, struc
> /* connection has been removed from the listen queue */
> knote(&head->so_rcv.sb_klist, 0);
>
> - if (persocket)
> - sounlock(head);
> + doaccept_sounlock(head, 0);
>
> fp->f_type = DTYPE_SOCKET;
> fp->f_flag = FREAD | FWRITE | nflag;
> @@ -338,10 +362,7 @@ doaccept(struct proc *p, int sock, struc
>
> error = soaccept(so, nam);
>
> - if (persocket)
> - sounlock(so);
> - else
> - sounlock(head);
> + doaccept_sounlock(so, 1);
>
> if (error)
> goto out;
> @@ -364,7 +385,7 @@ doaccept(struct proc *p, int sock, struc
> return 0;
>
> out_unlock:
> - sounlock(head);
> + doaccept_sounlock(head, 1);
> out:
> fdplock(fdp);
> fdremove(fdp, tmpfd);
tcp(4): unlock accept(2)