Index | Thread | Search

From:
Vitaliy Makkoveev <mvs@openbsd.org>
Subject:
tcp(4): unlock accept(2)
To:
Alexander Bluhm <bluhm@openbsd.org>, tech@openbsd.org
Date:
Sun, 5 Jan 2025 00:08:15 +0300

Download raw body.

Thread
Makes sense because accept(2) could be fast path. tcp_accept() is the
only in_setpeeraddr() which copies `inp_fport' and `inp_faddr' to passed
mbuf(9). Shared netlock with `so_lock' taken is pretty enough, but both
sockets should be locked simultaneously.

So yet another solock() variation. The second arg of doaccept_so*lock()
controls shared netlock acquisition and release. tcp(4) sockets are
PR_MPSOCKET sockets, so soleep_nsec() will be happy. We have some raw
inet6 sockets which are not PR_MPSOCKET, but they never follow this
path.

Note, we modify `so_qlen' of listening socket but filt_soread() takes
only shared netlock. This should be enough because we cache `so_qlen' 
to local variable.

Index: sys/kern/uipc_syscalls.c
===================================================================
RCS file: /cvs/src/sys/kern/uipc_syscalls.c,v
diff -u -p -r1.220 uipc_syscalls.c
--- sys/kern/uipc_syscalls.c	5 Nov 2024 09:14:19 -0000	1.220
+++ sys/kern/uipc_syscalls.c	4 Jan 2025 20:33:31 -0000
@@ -247,6 +247,34 @@ sys_accept4(struct proc *p, void *v, reg
 	    SCARG(uap, anamelen), SCARG(uap, flags), retval));
 }
 
+void
+doaccept_solock(struct socket *so, int take_netlock)
+{
+	if (take_netlock) {
+		switch (so->so_proto->pr_domain->dom_family) {
+		case PF_INET:
+		case PF_INET6:
+			NET_LOCK_SHARED();
+		}
+	}
+
+	rw_enter_write(&so->so_lock);
+}
+
+void
+doaccept_sounlock(struct socket *so, int release_netlock)
+{
+	rw_exit_write(&so->so_lock);
+
+	if (release_netlock) {
+		switch (so->so_proto->pr_domain->dom_family) {
+		case PF_INET:
+		case PF_INET6:
+			NET_UNLOCK_SHARED();
+		}
+	}
+}
+
 int
 doaccept(struct proc *p, int sock, struct sockaddr *name, socklen_t *anamelen,
     int flags, register_t *retval)
@@ -257,7 +285,7 @@ doaccept(struct proc *p, int sock, struc
 	socklen_t namelen;
 	int error, tmpfd;
 	struct socket *head, *so;
-	int cloexec, nflag, persocket;
+	int cloexec, nflag;
 
 	cloexec = (flags & SOCK_CLOEXEC) ? UF_EXCLOSE : 0;
 
@@ -279,9 +307,7 @@ doaccept(struct proc *p, int sock, struc
 	nam = m_get(M_WAIT, MT_SONAME);
 
 	head = headfp->f_data;
-	solock(head);
-
-	persocket = solock_persocket(head);
+	doaccept_solock(head, 1);
 
 	if (isdnssocket(head) || (head->so_options & SO_ACCEPTCONN) == 0) {
 		error = EINVAL;
@@ -315,8 +341,7 @@ doaccept(struct proc *p, int sock, struc
 	 */
 	so = TAILQ_FIRST(&head->so_q);
 
-	if (persocket)
-		solock(so);
+	doaccept_solock(so, 0);
 
 	if (soqremque(so, 1) == 0)
 		panic("accept");
@@ -328,8 +353,7 @@ doaccept(struct proc *p, int sock, struc
 	/* connection has been removed from the listen queue */
 	knote(&head->so_rcv.sb_klist, 0);
 
-	if (persocket)
-		sounlock(head);
+	doaccept_sounlock(head, 0);
 
 	fp->f_type = DTYPE_SOCKET;
 	fp->f_flag = FREAD | FWRITE | nflag;
@@ -338,10 +362,7 @@ doaccept(struct proc *p, int sock, struc
 
 	error = soaccept(so, nam);
 
-	if (persocket)
-		sounlock(so);
-	else
-		sounlock(head);
+	doaccept_sounlock(so, 1);
 
 	if (error)
 		goto out;
@@ -364,7 +385,7 @@ doaccept(struct proc *p, int sock, struc
 	return 0;
 
 out_unlock:
-	sounlock(head);
+	doaccept_sounlock(head, 1);
 out:
 	fdplock(fdp);
 	fdremove(fdp, tmpfd);