From: Alexander Bluhm Subject: Re: socket filter read once To: Vitaliy Makkoveev Cc: tech@openbsd.org Date: Fri, 8 Nov 2024 17:14:46 +0100 On Fri, Nov 08, 2024 at 06:42:26PM +0300, Vitaliy Makkoveev wrote: > On Fri, Nov 08, 2024 at 01:50:20PM +0100, Alexander Bluhm wrote: > > Hi, > > > > The socket filt_...() funtions are called with shared netlock, but > > without per socket lock. This can be done as they are read only. > > After unlocking, TCP will modify socket variables in parallel. So > > I would like to explicitly mark with READ_ONCE() where we have > > unlocked access. > > > > ok? > > > > `so_oobmark' belongs to reception path, so I want to use `so_rcv' > mutex(9) to protect it and already marked this in locking description. > This mutex(9) is held, so you don't need to load it to local variable. Like this? Index: kern/uipc_socket.c =================================================================== RCS file: /data/mirror/openbsd/cvs/src/sys/kern/uipc_socket.c,v diff -u -p -r1.344 uipc_socket.c --- kern/uipc_socket.c 31 Oct 2024 12:51:55 -0000 1.344 +++ kern/uipc_socket.c 8 Nov 2024 16:07:53 -0000 @@ -2433,6 +2433,8 @@ int filt_soread(struct knote *kn, long hint) { struct socket *so = kn->kn_fp->f_data; + u_int state = READ_ONCE(so->so_state); + u_int error = READ_ONCE(so->so_error); int rv = 0; MUTEX_ASSERT_LOCKED(&so->so_rcv.sb_mtx); @@ -2440,18 +2442,20 @@ filt_soread(struct knote *kn, long hint) soassertlocked_readonly(so); if (so->so_options & SO_ACCEPTCONN) { + short qlen = READ_ONCE(so->so_qlen); + if (so->so_rcv.sb_flags & SB_MTXLOCK) soassertlocked_readonly(so); - kn->kn_data = so->so_qlen; + kn->kn_data = qlen; rv = (kn->kn_data != 0); if (kn->kn_flags & (__EV_POLL | __EV_SELECT)) { - if (so->so_state & SS_ISDISCONNECTED) { + if (state & SS_ISDISCONNECTED) { kn->kn_flags |= __EV_HUP; rv = 1; } else { - rv = soreadable(so); + rv = qlen || soreadable(so); } } @@ -2467,12 +2471,12 @@ filt_soread(struct knote *kn, long hint) if (so->so_rcv.sb_state & SS_CANTRCVMORE) { kn->kn_flags |= EV_EOF; if (kn->kn_flags & __EV_POLL) { - if (so->so_state & SS_ISDISCONNECTED) + if (state & SS_ISDISCONNECTED) kn->kn_flags |= __EV_HUP; } - kn->kn_fflags = so->so_error; + kn->kn_fflags = error; rv = 1; - } else if (so->so_error) { + } else if (error) { rv = 1; } else if (kn->kn_sfflags & NOTE_LOWAT) { rv = (kn->kn_data >= kn->kn_sdata); @@ -2495,6 +2499,8 @@ int filt_sowrite(struct knote *kn, long hint) { struct socket *so = kn->kn_fp->f_data; + u_int state = READ_ONCE(so->so_state); + u_int error = READ_ONCE(so->so_error); int rv; MUTEX_ASSERT_LOCKED(&so->so_snd.sb_mtx); @@ -2505,14 +2511,14 @@ filt_sowrite(struct knote *kn, long hint if (so->so_snd.sb_state & SS_CANTSENDMORE) { kn->kn_flags |= EV_EOF; if (kn->kn_flags & __EV_POLL) { - if (so->so_state & SS_ISDISCONNECTED) + if (state & SS_ISDISCONNECTED) kn->kn_flags |= __EV_HUP; } - kn->kn_fflags = so->so_error; + kn->kn_fflags = error; rv = 1; - } else if (so->so_error) { + } else if (error) { rv = 1; - } else if (((so->so_state & SS_ISCONNECTED) == 0) && + } else if (((state & SS_ISCONNECTED) == 0) && (so->so_proto->pr_flags & PR_CONNREQUIRED)) { rv = 0; } else if (kn->kn_sfflags & NOTE_LOWAT) { @@ -2548,7 +2554,9 @@ filt_soexcept(struct knote *kn, long hin } if (kn->kn_flags & __EV_POLL) { - if (so->so_state & SS_ISDISCONNECTED) { + u_int state = READ_ONCE(so->so_state); + + if (state & SS_ISDISCONNECTED) { kn->kn_flags |= __EV_HUP; rv = 1; } Index: netinet/tcp_input.c =================================================================== RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/tcp_input.c,v diff -u -p -r1.407 tcp_input.c --- netinet/tcp_input.c 26 Aug 2024 13:55:14 -0000 1.407 +++ netinet/tcp_input.c 8 Nov 2024 16:07:53 -0000 @@ -1896,10 +1896,12 @@ step6: */ if (SEQ_GT(th->th_seq+th->th_urp, tp->rcv_up)) { tp->rcv_up = th->th_seq + th->th_urp; + mtx_enter(&so->so_rcv.sb_mtx); so->so_oobmark = so->so_rcv.sb_cc + (tp->rcv_up - tp->rcv_nxt) - 1; if (so->so_oobmark == 0) so->so_rcv.sb_state |= SS_RCVATMARK; + mtx_leave(&so->so_rcv.sb_mtx); sohasoutofband(so); tp->t_oobflags &= ~(TCPOOB_HAVEDATA | TCPOOB_HADDATA); }