From: Vitaliy Makkoveev Subject: Re: socket filter read once To: Alexander Bluhm Cc: tech@openbsd.org Date: Fri, 8 Nov 2024 20:35:02 +0300 > On 8 Nov 2024, at 19:14, Alexander Bluhm wrote: > > On Fri, Nov 08, 2024 at 06:42:26PM +0300, Vitaliy Makkoveev wrote: >> On Fri, Nov 08, 2024 at 01:50:20PM +0100, Alexander Bluhm wrote: >>> Hi, >>> >>> The socket filt_...() funtions are called with shared netlock, but >>> without per socket lock. This can be done as they are read only. >>> After unlocking, TCP will modify socket variables in parallel. So >>> I would like to explicitly mark with READ_ONCE() where we have >>> unlocked access. >>> >>> ok? >>> >> >> `so_oobmark' belongs to reception path, so I want to use `so_rcv' >> mutex(9) to protect it and already marked this in locking description. >> This mutex(9) is held, so you don't need to load it to local variable. > > Like this? > Yes. You modify it together with so_rcv, so corresponding lock is reasonable. We could move so_oobmark to the sockbuf, but it is useless for so_snd, so we save a little bit of space. Since you left tcp_input() exclusively locked, the last hunk is unnecessary, but anyway this diff is ok by me. > Index: kern/uipc_socket.c > =================================================================== > RCS file: /data/mirror/openbsd/cvs/src/sys/kern/uipc_socket.c,v > diff -u -p -r1.344 uipc_socket.c > --- kern/uipc_socket.c 31 Oct 2024 12:51:55 -0000 1.344 > +++ kern/uipc_socket.c 8 Nov 2024 16:07:53 -0000 > @@ -2433,6 +2433,8 @@ int > filt_soread(struct knote *kn, long hint) > { > struct socket *so = kn->kn_fp->f_data; > + u_int state = READ_ONCE(so->so_state); > + u_int error = READ_ONCE(so->so_error); > int rv = 0; > > MUTEX_ASSERT_LOCKED(&so->so_rcv.sb_mtx); > @@ -2440,18 +2442,20 @@ filt_soread(struct knote *kn, long hint) > soassertlocked_readonly(so); > > if (so->so_options & SO_ACCEPTCONN) { > + short qlen = READ_ONCE(so->so_qlen); > + > if (so->so_rcv.sb_flags & SB_MTXLOCK) > soassertlocked_readonly(so); > > - kn->kn_data = so->so_qlen; > + kn->kn_data = qlen; > rv = (kn->kn_data != 0); > > if (kn->kn_flags & (__EV_POLL | __EV_SELECT)) { > - if (so->so_state & SS_ISDISCONNECTED) { > + if (state & SS_ISDISCONNECTED) { > kn->kn_flags |= __EV_HUP; > rv = 1; > } else { > - rv = soreadable(so); > + rv = qlen || soreadable(so); > } > } > > @@ -2467,12 +2471,12 @@ filt_soread(struct knote *kn, long hint) > if (so->so_rcv.sb_state & SS_CANTRCVMORE) { > kn->kn_flags |= EV_EOF; > if (kn->kn_flags & __EV_POLL) { > - if (so->so_state & SS_ISDISCONNECTED) > + if (state & SS_ISDISCONNECTED) > kn->kn_flags |= __EV_HUP; > } > - kn->kn_fflags = so->so_error; > + kn->kn_fflags = error; > rv = 1; > - } else if (so->so_error) { > + } else if (error) { > rv = 1; > } else if (kn->kn_sfflags & NOTE_LOWAT) { > rv = (kn->kn_data >= kn->kn_sdata); > @@ -2495,6 +2499,8 @@ int > filt_sowrite(struct knote *kn, long hint) > { > struct socket *so = kn->kn_fp->f_data; > + u_int state = READ_ONCE(so->so_state); > + u_int error = READ_ONCE(so->so_error); > int rv; > > MUTEX_ASSERT_LOCKED(&so->so_snd.sb_mtx); > @@ -2505,14 +2511,14 @@ filt_sowrite(struct knote *kn, long hint > if (so->so_snd.sb_state & SS_CANTSENDMORE) { > kn->kn_flags |= EV_EOF; > if (kn->kn_flags & __EV_POLL) { > - if (so->so_state & SS_ISDISCONNECTED) > + if (state & SS_ISDISCONNECTED) > kn->kn_flags |= __EV_HUP; > } > - kn->kn_fflags = so->so_error; > + kn->kn_fflags = error; > rv = 1; > - } else if (so->so_error) { > + } else if (error) { > rv = 1; > - } else if (((so->so_state & SS_ISCONNECTED) == 0) && > + } else if (((state & SS_ISCONNECTED) == 0) && > (so->so_proto->pr_flags & PR_CONNREQUIRED)) { > rv = 0; > } else if (kn->kn_sfflags & NOTE_LOWAT) { > @@ -2548,7 +2554,9 @@ filt_soexcept(struct knote *kn, long hin > } > > if (kn->kn_flags & __EV_POLL) { > - if (so->so_state & SS_ISDISCONNECTED) { > + u_int state = READ_ONCE(so->so_state); > + > + if (state & SS_ISDISCONNECTED) { > kn->kn_flags |= __EV_HUP; > rv = 1; > } > Index: netinet/tcp_input.c > =================================================================== > RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/tcp_input.c,v > diff -u -p -r1.407 tcp_input.c > --- netinet/tcp_input.c 26 Aug 2024 13:55:14 -0000 1.407 > +++ netinet/tcp_input.c 8 Nov 2024 16:07:53 -0000 > @@ -1896,10 +1896,12 @@ step6: > */ > if (SEQ_GT(th->th_seq+th->th_urp, tp->rcv_up)) { > tp->rcv_up = th->th_seq + th->th_urp; > + mtx_enter(&so->so_rcv.sb_mtx); > so->so_oobmark = so->so_rcv.sb_cc + > (tp->rcv_up - tp->rcv_nxt) - 1; > if (so->so_oobmark == 0) > so->so_rcv.sb_state |= SS_RCVATMARK; > + mtx_leave(&so->so_rcv.sb_mtx); > sohasoutofband(so); > tp->t_oobflags &= ~(TCPOOB_HAVEDATA | TCPOOB_HADDATA); > }