From: Vitaliy Makkoveev Subject: Re: Please test: shared solock for all intet sockets within knote(9) routines To: Alexander Bluhm Cc: tech@openbsd.org Date: Tue, 30 Jan 2024 17:39:43 +0300 On Tue, Jan 30, 2024 at 12:52:03PM +0300, Vitaliy Makkoveev wrote: > On Mon, Jan 29, 2024 at 10:25:23PM +0300, Vitaliy Makkoveev wrote: > > On Mon, Jan 29, 2024 at 03:57:35PM +0100, Alexander Bluhm wrote: > > > On Mon, Jan 29, 2024 at 04:20:02PM +0300, Vitaliy Makkoveev wrote: > > > > So, I want to commit soassertlocked() right now. sofilt_lock() diff > > > > could be pushed to the snaps, so we could test them together. > > > > > > Regress not finished, but here are the first findings. I think it > > > is regress/sys/net/pf_divert which requires complicated setup with > > > two machines. > > > > > > divert_packet runs with shared net lock, without rwlock socket lock, > > > but with inpcb mutex for receive socket buffer. I think soassertlocked > > > has to take pru_lock() into account. > > > > > > And for sorwakeup this is not sufficent, there I hope for your > > > sofilt_lock(). > > > > > > This is basically the same what my diff found a month ago. > > > > > > bluhm > > Shared netlock is sufficient to call so{r,w}wakeup(). The following sowakeup() modifies `sb_flags' and knote(9) stuff. Unfortunately, we can't call so{r,w}wakeup() with mutex(9) because sowakeup() also calls pgsigio() which grabs kernel lock. However, `so*_filtops' callbacks only perform read-only access to the socket stuff, so it is enough to hold shared netlock only, but the klist stuff needs to be protected. This diff uses some chunks of my socket buffer standalone locking work. It introduces `sb_mtx' mutex(9) to protect sockbuf stuff. This time `sb_mtx' used to protect `sb_flags' and `sb_klist'. Now we have soassertlocked_readonly() and soassertlocked(). The first one is happy if only shared netlock is held, meanwhile the second wants `so_lock' or pru_lock() be held together with shared netlock. To keep soassertlocked*() assertions soft, we need to know mutex(9) state, so new mtx_owned() macro was introduces. Also, the new optional (*pru_locked)() handler brings the state of pru_lock(). Please test it, but keep in mind, soassertlocked_readonly() could be required in some more places. Index: sys/kern/uipc_socket.c =================================================================== RCS file: /cvs/src/sys/kern/uipc_socket.c,v retrieving revision 1.315 diff -u -p -r1.315 uipc_socket.c --- sys/kern/uipc_socket.c 26 Jan 2024 18:24:23 -0000 1.315 +++ sys/kern/uipc_socket.c 30 Jan 2024 14:24:45 -0000 @@ -72,26 +72,20 @@ int filt_soread(struct knote *kn, long h void filt_sowdetach(struct knote *kn); int filt_sowrite(struct knote *kn, long hint); int filt_soexcept(struct knote *kn, long hint); -int filt_solisten(struct knote *kn, long hint); -int filt_somodify(struct kevent *kev, struct knote *kn); -int filt_soprocess(struct knote *kn, struct kevent *kev); -const struct filterops solisten_filtops = { - .f_flags = FILTEROP_ISFD | FILTEROP_MPSAFE, - .f_attach = NULL, - .f_detach = filt_sordetach, - .f_event = filt_solisten, - .f_modify = filt_somodify, - .f_process = filt_soprocess, -}; +int filt_snd_somodify(struct kevent *kev, struct knote *kn); +int filt_snd_soprocess(struct knote *kn, struct kevent *kev); + +int filt_rcv_somodify(struct kevent *kev, struct knote *kn); +int filt_rcv_soprocess(struct knote *kn, struct kevent *kev); const struct filterops soread_filtops = { .f_flags = FILTEROP_ISFD | FILTEROP_MPSAFE, .f_attach = NULL, .f_detach = filt_sordetach, .f_event = filt_soread, - .f_modify = filt_somodify, - .f_process = filt_soprocess, + .f_modify = filt_rcv_somodify, + .f_process = filt_rcv_soprocess, }; const struct filterops sowrite_filtops = { @@ -99,8 +93,8 @@ const struct filterops sowrite_filtops = .f_attach = NULL, .f_detach = filt_sowdetach, .f_event = filt_sowrite, - .f_modify = filt_somodify, - .f_process = filt_soprocess, + .f_modify = filt_snd_somodify, + .f_process = filt_snd_soprocess, }; const struct filterops soexcept_filtops = { @@ -108,18 +102,28 @@ const struct filterops soexcept_filtops .f_attach = NULL, .f_detach = filt_sordetach, .f_event = filt_soexcept, - .f_modify = filt_somodify, - .f_process = filt_soprocess, + .f_modify = filt_rcv_somodify, + .f_process = filt_rcv_soprocess, +}; + +void klist_snd_soassertlk(void *); +int klist_snd_solock(void *); +void klist_snd_sounlock(void *, int); + +const struct klistops socket_snd_klistops = { + .klo_assertlk = klist_snd_soassertlk, + .klo_lock = klist_snd_solock, + .klo_unlock = klist_snd_sounlock, }; -void klist_soassertlk(void *); -int klist_solock(void *); -void klist_sounlock(void *, int); - -const struct klistops socket_klistops = { - .klo_assertlk = klist_soassertlk, - .klo_lock = klist_solock, - .klo_unlock = klist_sounlock, +void klist_rcv_soassertlk(void *); +int klist_rcv_solock(void *); +void klist_rcv_sounlock(void *, int); + +const struct klistops socket_rcv_klistops = { + .klo_assertlk = klist_rcv_soassertlk, + .klo_lock = klist_rcv_solock, + .klo_unlock = klist_rcv_sounlock, }; #ifndef SOMINCONN @@ -158,8 +162,10 @@ soalloc(const struct domain *dp, int wai return (NULL); rw_init_flags(&so->so_lock, dp->dom_name, RWL_DUPOK); refcnt_init(&so->so_refcnt); - klist_init(&so->so_rcv.sb_klist, &socket_klistops, so); - klist_init(&so->so_snd.sb_klist, &socket_klistops, so); + mtx_init(&so->so_rcv.sb_mtx, IPL_MPFLOOR); + mtx_init(&so->so_snd.sb_mtx, IPL_MPFLOOR); + klist_init(&so->so_rcv.sb_klist, &socket_rcv_klistops, so); + klist_init(&so->so_snd.sb_klist, &socket_snd_klistops, so); sigio_init(&so->so_sigio); TAILQ_INIT(&so->so_q0); TAILQ_INIT(&so->so_q); @@ -1757,7 +1763,7 @@ somove(struct socket *so, int wait) void sorwakeup(struct socket *so) { - soassertlocked(so); + soassertlocked_readonly(so); #ifdef SOCKET_SPLICE if (so->so_rcv.sb_flags & SB_SPLICE) { @@ -1785,7 +1791,7 @@ sorwakeup(struct socket *so) void sowwakeup(struct socket *so) { - soassertlocked(so); + soassertlocked_readonly(so); #ifdef SOCKET_SPLICE if (so->so_snd.sb_flags & SB_SPLICE) @@ -2137,19 +2143,54 @@ sohasoutofband(struct socket *so) knote_locked(&so->so_rcv.sb_klist, 0); } +void +sofilt_lock(struct socket *so, struct sockbuf *sb) +{ + switch (so->so_proto->pr_domain->dom_family) { + case PF_INET: + case PF_INET6: + NET_LOCK_SHARED(); + break; + default: + rw_enter_write(&so->so_lock); + break; + } + + mtx_enter(&sb->sb_mtx); +} + +void +sofilt_unlock(struct socket *so, struct sockbuf *sb) +{ + mtx_leave(&sb->sb_mtx); + + switch (so->so_proto->pr_domain->dom_family) { + case PF_INET: + case PF_INET6: + NET_UNLOCK_SHARED(); + break; + default: + rw_exit_write(&so->so_lock); + break; + } +} + +static inline void +sofilt_assert_locked(struct socket *so, struct sockbuf *sb) +{ + MUTEX_ASSERT_LOCKED(&sb->sb_mtx); + soassertlocked_readonly(so); +} + int soo_kqfilter(struct file *fp, struct knote *kn) { struct socket *so = kn->kn_fp->f_data; struct sockbuf *sb; - solock(so); switch (kn->kn_filter) { case EVFILT_READ: - if (so->so_options & SO_ACCEPTCONN) - kn->kn_fop = &solisten_filtops; - else - kn->kn_fop = &soread_filtops; + kn->kn_fop = &soread_filtops; sb = &so->so_rcv; break; case EVFILT_WRITE: @@ -2161,12 +2202,12 @@ soo_kqfilter(struct file *fp, struct kno sb = &so->so_rcv; break; default: - sounlock(so); return (EINVAL); } + mtx_enter(&sb->sb_mtx); klist_insert_locked(&sb->sb_klist, kn); - sounlock(so); + mtx_leave(&sb->sb_mtx); return (0); } @@ -2185,7 +2226,23 @@ filt_soread(struct knote *kn, long hint) struct socket *so = kn->kn_fp->f_data; int rv = 0; - soassertlocked(so); + sofilt_assert_locked(so, &so->so_rcv); + + if (so->so_options & SO_ACCEPTCONN) { + kn->kn_data = so->so_qlen; + rv = (kn->kn_data != 0); + + if (kn->kn_flags & (__EV_POLL | __EV_SELECT)) { + if (so->so_state & SS_ISDISCONNECTED) { + kn->kn_flags |= __EV_HUP; + rv = 1; + } else { + rv = soreadable(so); + } + } + + return rv; + } kn->kn_data = so->so_rcv.sb_cc; #ifdef SOCKET_SPLICE @@ -2226,7 +2283,7 @@ filt_sowrite(struct knote *kn, long hint struct socket *so = kn->kn_fp->f_data; int rv; - soassertlocked(so); + sofilt_assert_locked(so, &so->so_snd); kn->kn_data = sbspace(so, &so->so_snd); if (so->so_snd.sb_state & SS_CANTSENDMORE) { @@ -2257,7 +2314,7 @@ filt_soexcept(struct knote *kn, long hin struct socket *so = kn->kn_fp->f_data; int rv = 0; - soassertlocked(so); + sofilt_assert_locked(so, &so->so_rcv); #ifdef SOCKET_SPLICE if (isspliced(so)) { @@ -2283,77 +2340,105 @@ filt_soexcept(struct knote *kn, long hin } int -filt_solisten(struct knote *kn, long hint) +filt_snd_somodify(struct kevent *kev, struct knote *kn) { struct socket *so = kn->kn_fp->f_data; - int active; + int rv; - soassertlocked(so); + sofilt_lock(so, &so->so_snd); + rv = knote_modify(kev, kn); + sofilt_unlock(so, &so->so_snd); - kn->kn_data = so->so_qlen; - active = (kn->kn_data != 0); + return (rv); +} - if (kn->kn_flags & (__EV_POLL | __EV_SELECT)) { - if (so->so_state & SS_ISDISCONNECTED) { - kn->kn_flags |= __EV_HUP; - active = 1; - } else { - active = soreadable(so); - } - } +int +filt_snd_soprocess(struct knote *kn, struct kevent *kev) +{ + struct socket *so = kn->kn_fp->f_data; + int rv; + + sofilt_lock(so, &so->so_snd); + rv = knote_process(kn, kev); + sofilt_unlock(so, &so->so_snd); - return (active); + return (rv); } int -filt_somodify(struct kevent *kev, struct knote *kn) +filt_rcv_somodify(struct kevent *kev, struct knote *kn) { struct socket *so = kn->kn_fp->f_data; int rv; - solock(so); + sofilt_lock(so, &so->so_rcv); rv = knote_modify(kev, kn); - sounlock(so); + sofilt_unlock(so, &so->so_rcv); return (rv); } int -filt_soprocess(struct knote *kn, struct kevent *kev) +filt_rcv_soprocess(struct knote *kn, struct kevent *kev) { struct socket *so = kn->kn_fp->f_data; int rv; - solock(so); + sofilt_lock(so, &so->so_rcv); rv = knote_process(kn, kev); - sounlock(so); + sofilt_unlock(so, &so->so_rcv); return (rv); } void -klist_soassertlk(void *arg) +klist_snd_soassertlk(void *arg) { struct socket *so = arg; - soassertlocked(so); + MUTEX_ASSERT_LOCKED(&so->so_snd.sb_mtx); } int -klist_solock(void *arg) +klist_snd_solock(void *arg) { struct socket *so = arg; - solock(so); + mtx_enter(&so->so_snd.sb_mtx); return (1); } void -klist_sounlock(void *arg, int ls) +klist_snd_sounlock(void *arg, int ls) { struct socket *so = arg; - sounlock(so); + mtx_leave(&so->so_snd.sb_mtx); +} + +void +klist_rcv_soassertlk(void *arg) +{ + struct socket *so = arg; + + MUTEX_ASSERT_LOCKED(&so->so_rcv.sb_mtx); +} + +int +klist_rcv_solock(void *arg) +{ + struct socket *so = arg; + + mtx_enter(&so->so_rcv.sb_mtx); + return (1); +} + +void +klist_rcv_sounlock(void *arg, int ls) +{ + struct socket *so = arg; + + mtx_leave(&so->so_rcv.sb_mtx); } #ifdef DDB Index: sys/kern/uipc_socket2.c =================================================================== RCS file: /cvs/src/sys/kern/uipc_socket2.c,v retrieving revision 1.140 diff -u -p -r1.140 uipc_socket2.c --- sys/kern/uipc_socket2.c 11 Jan 2024 14:15:11 -0000 1.140 +++ sys/kern/uipc_socket2.c 30 Jan 2024 14:24:45 -0000 @@ -439,7 +439,7 @@ sounlock_shared(struct socket *so) } void -soassertlocked(struct socket *so) +soassertlocked_readonly(struct socket *so) { switch (so->so_proto->pr_domain->dom_family) { case PF_INET: @@ -452,6 +452,27 @@ soassertlocked(struct socket *so) } } +void +soassertlocked(struct socket *so) +{ + switch (so->so_proto->pr_domain->dom_family) { + case PF_INET: + case PF_INET6: + if (rw_status(&netlock) == RW_READ) { + NET_ASSERT_LOCKED(); + + if (splassert_ctl > 0 && pru_locked(so) == 0 && + rw_status(&so->so_lock) != RW_WRITE) + splassert_fail(0, RW_WRITE, __func__); + } else + NET_ASSERT_LOCKED_EXCLUSIVE(); + break; + default: + rw_assert_wrlock(&so->so_lock); + break; + } +} + int sosleep_nsec(struct socket *so, void *ident, int prio, const char *wmesg, uint64_t nsecs) @@ -489,46 +510,62 @@ sbwait(struct socket *so, struct sockbuf soassertlocked(so); + mtx_enter(&sb->sb_mtx); sb->sb_flags |= SB_WAIT; + mtx_leave(&sb->sb_mtx); + return sosleep_nsec(so, &sb->sb_cc, prio, "netio", sb->sb_timeo_nsecs); } int sblock(struct socket *so, struct sockbuf *sb, int flags) { - int error, prio = PSOCK; + int error = 0, prio = PSOCK; soassertlocked(so); + mtx_enter(&sb->sb_mtx); if ((sb->sb_flags & SB_LOCK) == 0) { sb->sb_flags |= SB_LOCK; - return (0); + goto out; + } + if ((flags & SBL_WAIT) == 0) { + error = EWOULDBLOCK; + goto out; } - if ((flags & SBL_WAIT) == 0) - return (EWOULDBLOCK); if (!(flags & SBL_NOINTR || sb->sb_flags & SB_NOINTR)) prio |= PCATCH; while (sb->sb_flags & SB_LOCK) { sb->sb_flags |= SB_WANT; + mtx_leave(&sb->sb_mtx); error = sosleep_nsec(so, &sb->sb_flags, prio, "netlck", INFSLP); if (error) return (error); + mtx_enter(&sb->sb_mtx); } sb->sb_flags |= SB_LOCK; +out: + mtx_leave(&sb->sb_mtx); + return (0); } void sbunlock(struct socket *so, struct sockbuf *sb) { - soassertlocked(so); + int dowakeup = 0; + mtx_enter(&sb->sb_mtx); sb->sb_flags &= ~SB_LOCK; if (sb->sb_flags & SB_WANT) { sb->sb_flags &= ~SB_WANT; - wakeup(&sb->sb_flags); + dowakeup = 1; } + mtx_leave(&sb->sb_mtx); + + if (dowakeup) + wakeup(&sb->sb_flags); } /* @@ -539,15 +576,24 @@ sbunlock(struct socket *so, struct sockb void sowakeup(struct socket *so, struct sockbuf *sb) { - soassertlocked(so); + int dowakeup = 0, dopgsigio = 0; + mtx_enter(&sb->sb_mtx); if (sb->sb_flags & SB_WAIT) { sb->sb_flags &= ~SB_WAIT; - wakeup(&sb->sb_cc); + dowakeup = 1; } if (sb->sb_flags & SB_ASYNC) - pgsigio(&so->so_sigio, SIGIO, 0); + dopgsigio = 1; + knote_locked(&sb->sb_klist, 0); + mtx_leave(&sb->sb_mtx); + + if (dowakeup) + wakeup(&sb->sb_cc); + + if (dopgsigio) + pgsigio(&so->so_sigio, SIGIO, 0); } /* Index: sys/kern/uipc_syscalls.c =================================================================== RCS file: /cvs/src/sys/kern/uipc_syscalls.c,v retrieving revision 1.216 diff -u -p -r1.216 uipc_syscalls.c --- sys/kern/uipc_syscalls.c 3 Jan 2024 11:07:04 -0000 1.216 +++ sys/kern/uipc_syscalls.c 30 Jan 2024 14:24:45 -0000 @@ -326,7 +326,9 @@ doaccept(struct proc *p, int sock, struc : (flags & SOCK_NONBLOCK ? FNONBLOCK : 0); /* connection has been removed from the listen queue */ + mtx_enter(&head->so_rcv.sb_mtx); knote_locked(&head->so_rcv.sb_klist, 0); + mtx_leave(&head->so_rcv.sb_mtx); if (persocket) sounlock(head); Index: sys/netinet/ip_divert.c =================================================================== RCS file: /cvs/src/sys/netinet/ip_divert.c,v retrieving revision 1.92 diff -u -p -r1.92 ip_divert.c --- sys/netinet/ip_divert.c 16 Sep 2023 09:33:27 -0000 1.92 +++ sys/netinet/ip_divert.c 30 Jan 2024 14:24:45 -0000 @@ -67,6 +67,7 @@ const struct pr_usrreqs divert_usrreqs = .pru_detach = divert_detach, .pru_lock = divert_lock, .pru_unlock = divert_unlock, + .pru_locked = divert_locked, .pru_bind = divert_bind, .pru_shutdown = divert_shutdown, .pru_send = divert_send, @@ -311,6 +312,14 @@ divert_unlock(struct socket *so) NET_ASSERT_LOCKED(); mtx_leave(&inp->inp_mtx); +} + +int +divert_locked(struct socket *so) +{ + struct inpcb *inp = sotoinpcb(so); + + return mtx_owned(&inp->inp_mtx); } int Index: sys/netinet/ip_divert.h =================================================================== RCS file: /cvs/src/sys/netinet/ip_divert.h,v retrieving revision 1.24 diff -u -p -r1.24 ip_divert.h --- sys/netinet/ip_divert.h 17 Oct 2022 14:49:02 -0000 1.24 +++ sys/netinet/ip_divert.h 30 Jan 2024 14:24:45 -0000 @@ -74,6 +74,7 @@ int divert_attach(struct socket *, int, int divert_detach(struct socket *); void divert_lock(struct socket *); void divert_unlock(struct socket *); +int divert_locked(struct socket *); int divert_bind(struct socket *, struct mbuf *, struct proc *); int divert_shutdown(struct socket *); int divert_send(struct socket *, struct mbuf *, struct mbuf *, Index: sys/netinet/ip_var.h =================================================================== RCS file: /cvs/src/sys/netinet/ip_var.h,v retrieving revision 1.110 diff -u -p -r1.110 ip_var.h --- sys/netinet/ip_var.h 26 Nov 2023 22:08:10 -0000 1.110 +++ sys/netinet/ip_var.h 30 Jan 2024 14:24:45 -0000 @@ -260,6 +260,7 @@ int rip_attach(struct socket *, int, in int rip_detach(struct socket *); void rip_lock(struct socket *); void rip_unlock(struct socket *); +int rip_locked(struct socket *); int rip_bind(struct socket *, struct mbuf *, struct proc *); int rip_connect(struct socket *, struct mbuf *); int rip_disconnect(struct socket *); Index: sys/netinet/raw_ip.c =================================================================== RCS file: /cvs/src/sys/netinet/raw_ip.c,v retrieving revision 1.154 diff -u -p -r1.154 raw_ip.c --- sys/netinet/raw_ip.c 21 Jan 2024 01:17:20 -0000 1.154 +++ sys/netinet/raw_ip.c 30 Jan 2024 14:24:45 -0000 @@ -108,6 +108,7 @@ const struct pr_usrreqs rip_usrreqs = { .pru_detach = rip_detach, .pru_lock = rip_lock, .pru_unlock = rip_unlock, + .pru_locked = rip_locked, .pru_bind = rip_bind, .pru_connect = rip_connect, .pru_disconnect = rip_disconnect, @@ -522,6 +523,14 @@ rip_unlock(struct socket *so) NET_ASSERT_LOCKED(); mtx_leave(&inp->inp_mtx); +} + +int +rip_locked(struct socket *so) +{ + struct inpcb *inp = sotoinpcb(so); + + return mtx_owned(&inp->inp_mtx); } int Index: sys/netinet/udp_usrreq.c =================================================================== RCS file: /cvs/src/sys/netinet/udp_usrreq.c,v retrieving revision 1.316 diff -u -p -r1.316 udp_usrreq.c --- sys/netinet/udp_usrreq.c 28 Jan 2024 20:34:25 -0000 1.316 +++ sys/netinet/udp_usrreq.c 30 Jan 2024 14:24:45 -0000 @@ -127,6 +127,7 @@ const struct pr_usrreqs udp_usrreqs = { .pru_detach = udp_detach, .pru_lock = udp_lock, .pru_unlock = udp_unlock, + .pru_locked = udp_locked, .pru_bind = udp_bind, .pru_connect = udp_connect, .pru_disconnect = udp_disconnect, @@ -143,6 +144,7 @@ const struct pr_usrreqs udp6_usrreqs = { .pru_detach = udp_detach, .pru_lock = udp_lock, .pru_unlock = udp_unlock, + .pru_locked = udp_locked, .pru_bind = udp_bind, .pru_connect = udp_connect, .pru_disconnect = udp_disconnect, @@ -1154,6 +1156,14 @@ udp_unlock(struct socket *so) NET_ASSERT_LOCKED(); mtx_leave(&inp->inp_mtx); +} + +int +udp_locked(struct socket *so) +{ + struct inpcb *inp = sotoinpcb(so); + + return mtx_owned(&inp->inp_mtx); } int Index: sys/netinet/udp_var.h =================================================================== RCS file: /cvs/src/sys/netinet/udp_var.h,v retrieving revision 1.50 diff -u -p -r1.50 udp_var.h --- sys/netinet/udp_var.h 10 Jan 2024 16:44:30 -0000 1.50 +++ sys/netinet/udp_var.h 30 Jan 2024 14:24:45 -0000 @@ -147,6 +147,7 @@ int udp_attach(struct socket *, int, in int udp_detach(struct socket *); void udp_lock(struct socket *); void udp_unlock(struct socket *); +int udp_locked(struct socket *); int udp_bind(struct socket *, struct mbuf *, struct proc *); int udp_connect(struct socket *, struct mbuf *); int udp_disconnect(struct socket *); Index: sys/netinet6/ip6_mroute.c =================================================================== RCS file: /cvs/src/sys/netinet6/ip6_mroute.c,v retrieving revision 1.138 diff -u -p -r1.138 ip6_mroute.c --- sys/netinet6/ip6_mroute.c 6 Dec 2023 09:27:17 -0000 1.138 +++ sys/netinet6/ip6_mroute.c 30 Jan 2024 14:24:45 -0000 @@ -861,12 +861,12 @@ socket6_send(struct socket *so, struct m mtx_enter(&inp->inp_mtx); ret = sbappendaddr(so, &so->so_rcv, sin6tosa(src), mm, NULL); + if (ret != 0) + sorwakeup(so); mtx_leave(&inp->inp_mtx); - if (ret != 0) { - sorwakeup(so); + if (ret != 0) return 0; - } } m_freem(mm); return -1; Index: sys/netinet6/ip6_var.h =================================================================== RCS file: /cvs/src/sys/netinet6/ip6_var.h,v retrieving revision 1.109 diff -u -p -r1.109 ip6_var.h --- sys/netinet6/ip6_var.h 3 Dec 2023 20:36:24 -0000 1.109 +++ sys/netinet6/ip6_var.h 30 Jan 2024 14:24:45 -0000 @@ -353,6 +353,7 @@ int rip6_attach(struct socket *, int, in int rip6_detach(struct socket *); void rip6_lock(struct socket *); void rip6_unlock(struct socket *); +int rip6_locked(struct socket *); int rip6_bind(struct socket *, struct mbuf *, struct proc *); int rip6_connect(struct socket *, struct mbuf *); int rip6_disconnect(struct socket *); Index: sys/netinet6/raw_ip6.c =================================================================== RCS file: /cvs/src/sys/netinet6/raw_ip6.c,v retrieving revision 1.179 diff -u -p -r1.179 raw_ip6.c --- sys/netinet6/raw_ip6.c 21 Jan 2024 01:17:20 -0000 1.179 +++ sys/netinet6/raw_ip6.c 30 Jan 2024 14:24:45 -0000 @@ -110,6 +110,7 @@ const struct pr_usrreqs rip6_usrreqs = { .pru_detach = rip6_detach, .pru_lock = rip6_lock, .pru_unlock = rip6_unlock, + .pru_locked = rip6_locked, .pru_bind = rip6_bind, .pru_connect = rip6_connect, .pru_disconnect = rip6_disconnect, @@ -651,6 +652,14 @@ rip6_unlock(struct socket *so) NET_ASSERT_LOCKED(); mtx_leave(&inp->inp_mtx); +} + +int +rip6_locked(struct socket *so) +{ + struct inpcb *inp = sotoinpcb(so); + + return mtx_owned(&inp->inp_mtx); } int Index: sys/sys/mutex.h =================================================================== RCS file: /cvs/src/sys/sys/mutex.h,v retrieving revision 1.19 diff -u -p -r1.19 mutex.h --- sys/sys/mutex.h 1 Dec 2023 14:37:22 -0000 1.19 +++ sys/sys/mutex.h 30 Jan 2024 14:24:46 -0000 @@ -127,6 +127,9 @@ void mtx_leave(struct mutex *); #define mtx_init(m, ipl) mtx_init_flags(m, ipl, NULL, 0) +#define mtx_owned(mtx) \ + (((mtx)->mtx_owner == curcpu()) || panicstr || db_active) + #ifdef WITNESS void _mtx_init_flags(struct mutex *, int, const char *, int, Index: sys/sys/protosw.h =================================================================== RCS file: /cvs/src/sys/sys/protosw.h,v retrieving revision 1.64 diff -u -p -r1.64 protosw.h --- sys/sys/protosw.h 11 Jan 2024 14:15:12 -0000 1.64 +++ sys/sys/protosw.h 30 Jan 2024 14:24:46 -0000 @@ -69,6 +69,7 @@ struct pr_usrreqs { int (*pru_detach)(struct socket *); void (*pru_lock)(struct socket *); void (*pru_unlock)(struct socket *); + int (*pru_locked)(struct socket *so); int (*pru_bind)(struct socket *, struct mbuf *, struct proc *); int (*pru_listen)(struct socket *); int (*pru_connect)(struct socket *, struct mbuf *); @@ -294,6 +295,14 @@ pru_unlock(struct socket *so) { if (so->so_proto->pr_usrreqs->pru_unlock) (*so->so_proto->pr_usrreqs->pru_unlock)(so); +} + +static inline int +pru_locked(struct socket *so) +{ + if (so->so_proto->pr_usrreqs->pru_locked) + return (*so->so_proto->pr_usrreqs->pru_locked)(so); + return (0); } static inline int Index: sys/sys/socketvar.h =================================================================== RCS file: /cvs/src/sys/sys/socketvar.h,v retrieving revision 1.121 diff -u -p -r1.121 socketvar.h --- sys/sys/socketvar.h 11 Jan 2024 14:15:12 -0000 1.121 +++ sys/sys/socketvar.h 30 Jan 2024 14:24:46 -0000 @@ -40,6 +40,7 @@ #include /* for struct sigio_ref */ #include #include +#include #include #include @@ -105,6 +106,7 @@ struct socket { * Variables for socket buffering. */ struct sockbuf { + struct mutex sb_mtx; /* The following fields are all zeroed on flush. */ #define sb_startzero sb_cc u_long sb_cc; /* actual chars in buffer */ @@ -174,6 +176,7 @@ struct socket { #include void soassertlocked(struct socket *); +void soassertlocked_readonly(struct socket *); static inline void soref(struct socket *so) @@ -211,10 +214,12 @@ sb_notify(struct socket *so, struct sock * still be negative (cc > hiwat or mbcnt > mbmax). Should detect * overflow and return 0. */ + static inline long sbspace(struct socket *so, struct sockbuf *sb) { - soassertlocked(so); + soassertlocked_readonly(so); + return lmin(sb->sb_hiwat - sb->sb_cc, sb->sb_mbmax - sb->sb_mbcnt); } @@ -230,7 +235,7 @@ sbspace(struct socket *so, struct sockbu static inline int soreadable(struct socket *so) { - soassertlocked(so); + soassertlocked_readonly(so); if (isspliced(so)) return 0; return (so->so_rcv.sb_state & SS_CANTRCVMORE) || so->so_qlen || @@ -241,7 +246,7 @@ soreadable(struct socket *so) static inline int sowriteable(struct socket *so) { - soassertlocked(so); + soassertlocked_readonly(so); return ((sbspace(so, &so->so_snd) >= so->so_snd.sb_lowat && ((so->so_state & SS_ISCONNECTED) || (so->so_proto->pr_flags & PR_CONNREQUIRED)==0)) ||