From: Vitaliy Makkoveev Subject: Re: Please test: shared solock for all intet sockets within knote(9) routines To: Alexander Bluhm Cc: tech@openbsd.org Date: Mon, 29 Jan 2024 22:25:23 +0300 On Mon, Jan 29, 2024 at 03:57:35PM +0100, Alexander Bluhm wrote: > On Mon, Jan 29, 2024 at 04:20:02PM +0300, Vitaliy Makkoveev wrote: > > So, I want to commit soassertlocked() right now. sofilt_lock() diff > > could be pushed to the snaps, so we could test them together. > > Regress not finished, but here are the first findings. I think it > is regress/sys/net/pf_divert which requires complicated setup with > two machines. > > divert_packet runs with shared net lock, without rwlock socket lock, > but with inpcb mutex for receive socket buffer. I think soassertlocked > has to take pru_lock() into account. > > And for sorwakeup this is not sufficent, there I hope for your > sofilt_lock(). > > This is basically the same what my diff found a month ago. > > bluhm We should have no so{r,w}wakeup() calls outside pru_lock() or `so_lock' protection for shared netlock case. May be we have the only caller with shared netlock for socket instance, but this could be changed in the future. Any case this is very fragile. Updated diff. We have no rw_status() analog for mutex, so I introduced mtx_owned() macro. The 'pr_usrreqs' structure received new optional `pru_locked' member to get the status of pru_lock(). soassertlocked() was adjusted to use it. sorwakeup() call was moved under `inp_mtx' for both divert sockets and multicast routing. This diff also includes the sofilt_lock() hunks to make the test coverage wider. I want to commit hem separately, so I remove them before commit. The mutex man page is missing. I'll add it later if required. Index: sys/kern/uipc_socket.c =================================================================== RCS file: /cvs/src/sys/kern/uipc_socket.c,v retrieving revision 1.315 diff -u -p -r1.315 uipc_socket.c --- sys/kern/uipc_socket.c 26 Jan 2024 18:24:23 -0000 1.315 +++ sys/kern/uipc_socket.c 29 Jan 2024 19:08:04 -0000 @@ -2137,13 +2137,45 @@ sohasoutofband(struct socket *so) knote_locked(&so->so_rcv.sb_klist, 0); } +void +sofilt_lock(struct socket *so) +{ + switch (so->so_proto->pr_domain->dom_family) { + case PF_INET: + case PF_INET6: + NET_LOCK_SHARED(); + rw_enter_write(&so->so_lock); + pru_lock(so); + break; + default: + rw_enter_write(&so->so_lock); + break; + } +} + +void +sofilt_unlock(struct socket *so) +{ + switch (so->so_proto->pr_domain->dom_family) { + case PF_INET: + case PF_INET6: + pru_unlock(so); + rw_exit_write(&so->so_lock); + NET_UNLOCK_SHARED(); + break; + default: + rw_exit_write(&so->so_lock); + break; + } +} + int soo_kqfilter(struct file *fp, struct knote *kn) { struct socket *so = kn->kn_fp->f_data; struct sockbuf *sb; - solock(so); + sofilt_lock(so); switch (kn->kn_filter) { case EVFILT_READ: if (so->so_options & SO_ACCEPTCONN) @@ -2161,12 +2193,12 @@ soo_kqfilter(struct file *fp, struct kno sb = &so->so_rcv; break; default: - sounlock(so); + sofilt_unlock(so); return (EINVAL); } klist_insert_locked(&sb->sb_klist, kn); - sounlock(so); + sofilt_unlock(so); return (0); } @@ -2311,9 +2343,9 @@ filt_somodify(struct kevent *kev, struct struct socket *so = kn->kn_fp->f_data; int rv; - solock(so); + sofilt_lock(so); rv = knote_modify(kev, kn); - sounlock(so); + sofilt_unlock(so); return (rv); } @@ -2324,9 +2356,9 @@ filt_soprocess(struct knote *kn, struct struct socket *so = kn->kn_fp->f_data; int rv; - solock(so); + sofilt_lock(so); rv = knote_process(kn, kev); - sounlock(so); + sofilt_unlock(so); return (rv); } @@ -2344,7 +2376,7 @@ klist_solock(void *arg) { struct socket *so = arg; - solock(so); + sofilt_lock(so); return (1); } @@ -2353,7 +2385,7 @@ klist_sounlock(void *arg, int ls) { struct socket *so = arg; - sounlock(so); + sofilt_unlock(so); } #ifdef DDB Index: sys/kern/uipc_socket2.c =================================================================== RCS file: /cvs/src/sys/kern/uipc_socket2.c,v retrieving revision 1.140 diff -u -p -r1.140 uipc_socket2.c --- sys/kern/uipc_socket2.c 11 Jan 2024 14:15:11 -0000 1.140 +++ sys/kern/uipc_socket2.c 29 Jan 2024 19:08:04 -0000 @@ -444,7 +444,14 @@ soassertlocked(struct socket *so) switch (so->so_proto->pr_domain->dom_family) { case PF_INET: case PF_INET6: - NET_ASSERT_LOCKED(); + if (rw_status(&netlock) == RW_READ) { + NET_ASSERT_LOCKED(); + + if (splassert_ctl > 0 && pru_locked(so) == 0 && + rw_status(&so->so_lock) != RW_WRITE) + splassert_fail(0, RW_WRITE, __func__); + } else + NET_ASSERT_LOCKED_EXCLUSIVE(); break; default: rw_assert_wrlock(&so->so_lock); Index: sys/netinet/ip_divert.c =================================================================== RCS file: /cvs/src/sys/netinet/ip_divert.c,v retrieving revision 1.92 diff -u -p -r1.92 ip_divert.c --- sys/netinet/ip_divert.c 16 Sep 2023 09:33:27 -0000 1.92 +++ sys/netinet/ip_divert.c 29 Jan 2024 19:08:04 -0000 @@ -67,6 +67,7 @@ const struct pr_usrreqs divert_usrreqs = .pru_detach = divert_detach, .pru_lock = divert_lock, .pru_unlock = divert_unlock, + .pru_locked = divert_locked, .pru_bind = divert_bind, .pru_shutdown = divert_shutdown, .pru_send = divert_send, @@ -247,8 +248,8 @@ divert_packet(struct mbuf *m, int dir, u divstat_inc(divs_fullsock); goto bad; } - mtx_leave(&inp->inp_mtx); sorwakeup(so); + mtx_leave(&inp->inp_mtx); in_pcbunref(inp); return; @@ -311,6 +312,14 @@ divert_unlock(struct socket *so) NET_ASSERT_LOCKED(); mtx_leave(&inp->inp_mtx); +} + +int +divert_locked(struct socket *so) +{ + struct inpcb *inp = sotoinpcb(so); + + return mtx_owned(&inp->inp_mtx); } int Index: sys/netinet/ip_divert.h =================================================================== RCS file: /cvs/src/sys/netinet/ip_divert.h,v retrieving revision 1.24 diff -u -p -r1.24 ip_divert.h --- sys/netinet/ip_divert.h 17 Oct 2022 14:49:02 -0000 1.24 +++ sys/netinet/ip_divert.h 29 Jan 2024 19:08:04 -0000 @@ -74,6 +74,7 @@ int divert_attach(struct socket *, int, int divert_detach(struct socket *); void divert_lock(struct socket *); void divert_unlock(struct socket *); +int divert_locked(struct socket *); int divert_bind(struct socket *, struct mbuf *, struct proc *); int divert_shutdown(struct socket *); int divert_send(struct socket *, struct mbuf *, struct mbuf *, Index: sys/netinet/ip_mroute.c =================================================================== RCS file: /cvs/src/sys/netinet/ip_mroute.c,v retrieving revision 1.140 diff -u -p -r1.140 ip_mroute.c --- sys/netinet/ip_mroute.c 6 Dec 2023 09:27:17 -0000 1.140 +++ sys/netinet/ip_mroute.c 29 Jan 2024 19:08:04 -0000 @@ -1056,12 +1056,12 @@ socket_send(struct socket *so, struct mb mtx_enter(&inp->inp_mtx); ret = sbappendaddr(so, &so->so_rcv, sintosa(src), mm, NULL); + if (ret != 0) + sorwakeup(so); mtx_leave(&inp->inp_mtx); - if (ret != 0) { - sorwakeup(so); + if (ret != 0) return (0); - } } m_freem(mm); return (-1); Index: sys/netinet/ip_var.h =================================================================== RCS file: /cvs/src/sys/netinet/ip_var.h,v retrieving revision 1.110 diff -u -p -r1.110 ip_var.h --- sys/netinet/ip_var.h 26 Nov 2023 22:08:10 -0000 1.110 +++ sys/netinet/ip_var.h 29 Jan 2024 19:08:04 -0000 @@ -260,6 +260,7 @@ int rip_attach(struct socket *, int, in int rip_detach(struct socket *); void rip_lock(struct socket *); void rip_unlock(struct socket *); +int rip_locked(struct socket *); int rip_bind(struct socket *, struct mbuf *, struct proc *); int rip_connect(struct socket *, struct mbuf *); int rip_disconnect(struct socket *); Index: sys/netinet/raw_ip.c =================================================================== RCS file: /cvs/src/sys/netinet/raw_ip.c,v retrieving revision 1.154 diff -u -p -r1.154 raw_ip.c --- sys/netinet/raw_ip.c 21 Jan 2024 01:17:20 -0000 1.154 +++ sys/netinet/raw_ip.c 29 Jan 2024 19:08:04 -0000 @@ -108,6 +108,7 @@ const struct pr_usrreqs rip_usrreqs = { .pru_detach = rip_detach, .pru_lock = rip_lock, .pru_unlock = rip_unlock, + .pru_locked = rip_locked, .pru_bind = rip_bind, .pru_connect = rip_connect, .pru_disconnect = rip_disconnect, @@ -522,6 +523,14 @@ rip_unlock(struct socket *so) NET_ASSERT_LOCKED(); mtx_leave(&inp->inp_mtx); +} + +int +rip_locked(struct socket *so) +{ + struct inpcb *inp = sotoinpcb(so); + + return mtx_owned(&inp->inp_mtx); } int Index: sys/netinet/udp_usrreq.c =================================================================== RCS file: /cvs/src/sys/netinet/udp_usrreq.c,v retrieving revision 1.315 diff -u -p -r1.315 udp_usrreq.c --- sys/netinet/udp_usrreq.c 21 Jan 2024 01:17:20 -0000 1.315 +++ sys/netinet/udp_usrreq.c 29 Jan 2024 19:08:04 -0000 @@ -127,6 +127,7 @@ const struct pr_usrreqs udp_usrreqs = { .pru_detach = udp_detach, .pru_lock = udp_lock, .pru_unlock = udp_unlock, + .pru_locked = udp_locked, .pru_bind = udp_bind, .pru_connect = udp_connect, .pru_disconnect = udp_disconnect, @@ -143,6 +144,7 @@ const struct pr_usrreqs udp6_usrreqs = { .pru_detach = udp_detach, .pru_lock = udp_lock, .pru_unlock = udp_unlock, + .pru_locked = udp_locked, .pru_bind = udp_bind, .pru_connect = udp_connect, .pru_disconnect = udp_disconnect, @@ -1154,6 +1156,14 @@ udp_unlock(struct socket *so) NET_ASSERT_LOCKED(); mtx_leave(&inp->inp_mtx); +} + +int +udp_locked(struct socket *so) +{ + struct inpcb *inp = sotoinpcb(so); + + return mtx_owned(&inp->inp_mtx); } int Index: sys/netinet/udp_var.h =================================================================== RCS file: /cvs/src/sys/netinet/udp_var.h,v retrieving revision 1.50 diff -u -p -r1.50 udp_var.h --- sys/netinet/udp_var.h 10 Jan 2024 16:44:30 -0000 1.50 +++ sys/netinet/udp_var.h 29 Jan 2024 19:08:04 -0000 @@ -147,6 +147,7 @@ int udp_attach(struct socket *, int, in int udp_detach(struct socket *); void udp_lock(struct socket *); void udp_unlock(struct socket *); +int udp_locked(struct socket *); int udp_bind(struct socket *, struct mbuf *, struct proc *); int udp_connect(struct socket *, struct mbuf *); int udp_disconnect(struct socket *); Index: sys/netinet6/ip6_divert.c =================================================================== RCS file: /cvs/src/sys/netinet6/ip6_divert.c,v retrieving revision 1.91 diff -u -p -r1.91 ip6_divert.c --- sys/netinet6/ip6_divert.c 1 Jan 2024 18:52:09 -0000 1.91 +++ sys/netinet6/ip6_divert.c 29 Jan 2024 19:08:04 -0000 @@ -69,6 +69,7 @@ const struct pr_usrreqs divert6_usrreqs .pru_detach = divert_detach, .pru_lock = divert_lock, .pru_unlock = divert_unlock, + .pru_locked = divert_locked, .pru_bind = divert_bind, .pru_shutdown = divert_shutdown, .pru_send = divert6_send, @@ -254,8 +255,8 @@ divert6_packet(struct mbuf *m, int dir, div6stat_inc(div6s_fullsock); goto bad; } - mtx_leave(&inp->inp_mtx); sorwakeup(so); + mtx_leave(&inp->inp_mtx); in_pcbunref(inp); return; Index: sys/netinet6/ip6_mroute.c =================================================================== RCS file: /cvs/src/sys/netinet6/ip6_mroute.c,v retrieving revision 1.138 diff -u -p -r1.138 ip6_mroute.c --- sys/netinet6/ip6_mroute.c 6 Dec 2023 09:27:17 -0000 1.138 +++ sys/netinet6/ip6_mroute.c 29 Jan 2024 19:08:04 -0000 @@ -861,12 +861,12 @@ socket6_send(struct socket *so, struct m mtx_enter(&inp->inp_mtx); ret = sbappendaddr(so, &so->so_rcv, sin6tosa(src), mm, NULL); + if (ret != 0) + sorwakeup(so); mtx_leave(&inp->inp_mtx); - if (ret != 0) { - sorwakeup(so); + if (ret != 0) return 0; - } } m_freem(mm); return -1; Index: sys/netinet6/ip6_var.h =================================================================== RCS file: /cvs/src/sys/netinet6/ip6_var.h,v retrieving revision 1.109 diff -u -p -r1.109 ip6_var.h --- sys/netinet6/ip6_var.h 3 Dec 2023 20:36:24 -0000 1.109 +++ sys/netinet6/ip6_var.h 29 Jan 2024 19:08:04 -0000 @@ -353,6 +353,7 @@ int rip6_attach(struct socket *, int, in int rip6_detach(struct socket *); void rip6_lock(struct socket *); void rip6_unlock(struct socket *); +int rip6_locked(struct socket *); int rip6_bind(struct socket *, struct mbuf *, struct proc *); int rip6_connect(struct socket *, struct mbuf *); int rip6_disconnect(struct socket *); Index: sys/netinet6/raw_ip6.c =================================================================== RCS file: /cvs/src/sys/netinet6/raw_ip6.c,v retrieving revision 1.179 diff -u -p -r1.179 raw_ip6.c --- sys/netinet6/raw_ip6.c 21 Jan 2024 01:17:20 -0000 1.179 +++ sys/netinet6/raw_ip6.c 29 Jan 2024 19:08:04 -0000 @@ -110,6 +110,7 @@ const struct pr_usrreqs rip6_usrreqs = { .pru_detach = rip6_detach, .pru_lock = rip6_lock, .pru_unlock = rip6_unlock, + .pru_locked = rip6_locked, .pru_bind = rip6_bind, .pru_connect = rip6_connect, .pru_disconnect = rip6_disconnect, @@ -651,6 +652,14 @@ rip6_unlock(struct socket *so) NET_ASSERT_LOCKED(); mtx_leave(&inp->inp_mtx); +} + +int +rip6_locked(struct socket *so) +{ + struct inpcb *inp = sotoinpcb(so); + + return mtx_owned(&inp->inp_mtx); } int Index: sys/sys/mutex.h =================================================================== RCS file: /cvs/src/sys/sys/mutex.h,v retrieving revision 1.19 diff -u -p -r1.19 mutex.h --- sys/sys/mutex.h 1 Dec 2023 14:37:22 -0000 1.19 +++ sys/sys/mutex.h 29 Jan 2024 19:08:04 -0000 @@ -127,6 +127,9 @@ void mtx_leave(struct mutex *); #define mtx_init(m, ipl) mtx_init_flags(m, ipl, NULL, 0) +#define mtx_owned(mtx) \ + (((mtx)->mtx_owner == curcpu()) || panicstr || db_active) + #ifdef WITNESS void _mtx_init_flags(struct mutex *, int, const char *, int, Index: sys/sys/protosw.h =================================================================== RCS file: /cvs/src/sys/sys/protosw.h,v retrieving revision 1.64 diff -u -p -r1.64 protosw.h --- sys/sys/protosw.h 11 Jan 2024 14:15:12 -0000 1.64 +++ sys/sys/protosw.h 29 Jan 2024 19:08:04 -0000 @@ -69,6 +69,7 @@ struct pr_usrreqs { int (*pru_detach)(struct socket *); void (*pru_lock)(struct socket *); void (*pru_unlock)(struct socket *); + int (*pru_locked)(struct socket *so); int (*pru_bind)(struct socket *, struct mbuf *, struct proc *); int (*pru_listen)(struct socket *); int (*pru_connect)(struct socket *, struct mbuf *); @@ -294,6 +295,14 @@ pru_unlock(struct socket *so) { if (so->so_proto->pr_usrreqs->pru_unlock) (*so->so_proto->pr_usrreqs->pru_unlock)(so); +} + +static inline int +pru_locked(struct socket *so) +{ + if (so->so_proto->pr_usrreqs->pru_locked) + return (*so->so_proto->pr_usrreqs->pru_locked)(so); + return (0); } static inline int