From: Alexander Bluhm Subject: unlock socket close To: tech@openbsd.org Date: Mon, 14 Jul 2025 20:18:07 +0200 Hi, This diff is needed to run close(2) system call with shared netlock. The unlock order in sounlock_shared() has to be swapped. The socket life time is controlled by SS_NOFDREF flag. It is protected by socket lock. As soon rw_exit_write(&so->so_lock) is called, another thread may free the socket. Reading so->so_proto->pr_domain->dom_family must be done before. When using exclusive netlock, another thread could not run. But with shared netlock, only socket lock protects the freeing. Also remove an outdated comment about freeing sockets with netlock. ok? bluhm Index: kern/uipc_socket.c =================================================================== RCS file: /data/mirror/openbsd/cvs/src/sys/kern/uipc_socket.c,v diff -u -p -r1.380 uipc_socket.c --- kern/uipc_socket.c 2 Jul 2025 16:44:40 -0000 1.380 +++ kern/uipc_socket.c 11 Jul 2025 21:53:59 -0000 @@ -213,10 +213,7 @@ socreate(int dom, struct socket **aso, i if (error) { so->so_state |= SS_NOFDREF; /* sofree() calls sounlock(). */ - soref(so); - sofree(so, 1); - sounlock_shared(so); - sorele(so); + sofree(so, 0); return (error); } sounlock_shared(so); @@ -304,7 +301,7 @@ sofree(struct socket *so, int keep_lock) if (so->so_pcb || (so->so_state & SS_NOFDREF) == 0) { if (!keep_lock) - sounlock(so); + sounlock_shared(so); return; } if (so->so_head) { @@ -317,7 +314,7 @@ sofree(struct socket *so, int keep_lock) */ if (so->so_onq == &head->so_q) { if (!keep_lock) - sounlock(so); + sounlock_shared(so); return; } @@ -344,7 +341,7 @@ sofree(struct socket *so, int keep_lock) } if (!keep_lock) - sounlock(so); + sounlock_shared(so); sorele(so); } @@ -368,7 +365,7 @@ soclose(struct socket *so, int flags) struct socket *so2; int error = 0; - solock(so); + solock_shared(so); /* Revoke async IO early. There is a final revocation in sofree(). */ sigio_free(&so->so_sigio); if (so->so_state & SS_ISCONNECTED) { @@ -430,7 +427,7 @@ discard: if (so->so_sp) { struct socket *soback; - sounlock(so); + sounlock_shared(so); /* * Concurrent sounsplice() locks `sb_mtx' mutexes on * both `so_snd' and `so_rcv' before unsplice sockets. @@ -477,7 +474,7 @@ notsplicedback: task_del(sosplice_taskq, &so->so_sp->ssp_task); taskq_barrier(sosplice_taskq); - solock(so); + solock_shared(so); } #endif /* SOCKET_SPLICE */ Index: kern/uipc_socket2.c =================================================================== RCS file: /data/mirror/openbsd/cvs/src/sys/kern/uipc_socket2.c,v diff -u -p -r1.185 uipc_socket2.c --- kern/uipc_socket2.c 12 Mar 2025 14:08:31 -0000 1.185 +++ kern/uipc_socket2.c 11 Jul 2025 22:26:13 -0000 @@ -409,13 +409,13 @@ sounlock(struct socket *so) void sounlock_shared(struct socket *so) { - rw_exit_write(&so->so_lock); switch (so->so_proto->pr_domain->dom_family) { case PF_INET: case PF_INET6: NET_UNLOCK_SHARED(); break; } + rw_exit_write(&so->so_lock); } void @@ -427,6 +427,12 @@ sounlock_nonet(struct socket *so) void sounlock_pair(struct socket *so1, struct socket *so2) { + switch (so1->so_proto->pr_domain->dom_family) { + case PF_INET: + case PF_INET6: + NET_UNLOCK_SHARED(); + break; + } if (so1 == so2) rw_exit_write(&so1->so_lock); else if (so1 < so2) { @@ -435,12 +441,6 @@ sounlock_pair(struct socket *so1, struct } else { rw_exit_write(&so1->so_lock); rw_exit_write(&so2->so_lock); - } - switch (so1->so_proto->pr_domain->dom_family) { - case PF_INET: - case PF_INET6: - NET_UNLOCK_SHARED(); - break; } } Index: netinet/in_pcb.c =================================================================== RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/in_pcb.c,v diff -u -p -r1.319 in_pcb.c --- netinet/in_pcb.c 8 Jul 2025 00:47:41 -0000 1.319 +++ netinet/in_pcb.c 11 Jul 2025 22:26:13 -0000 @@ -589,11 +589,6 @@ in_pcbdetach(struct inpcb *inp) soassertlocked(so); so->so_pcb = NULL; - /* - * As long as the NET_LOCK() is the default lock for Internet - * sockets, do not release it to not introduce new sleeping - * points. - */ sofree(so, 1); if (inp->inp_route.ro_rt) { rtfree(inp->inp_route.ro_rt);