Download raw body.
socket filter read once
> On 8 Nov 2024, at 19:14, Alexander Bluhm <bluhm@openbsd.org> wrote:
>
> On Fri, Nov 08, 2024 at 06:42:26PM +0300, Vitaliy Makkoveev wrote:
>> On Fri, Nov 08, 2024 at 01:50:20PM +0100, Alexander Bluhm wrote:
>>> Hi,
>>>
>>> The socket filt_...() funtions are called with shared netlock, but
>>> without per socket lock. This can be done as they are read only.
>>> After unlocking, TCP will modify socket variables in parallel. So
>>> I would like to explicitly mark with READ_ONCE() where we have
>>> unlocked access.
>>>
>>> ok?
>>>
>>
>> `so_oobmark' belongs to reception path, so I want to use `so_rcv'
>> mutex(9) to protect it and already marked this in locking description.
>> This mutex(9) is held, so you don't need to load it to local variable.
>
> Like this?
>
Yes. You modify it together with so_rcv, so corresponding lock is
reasonable. We could move so_oobmark to the sockbuf, but it is useless
for so_snd, so we save a little bit of space.
Since you left tcp_input() exclusively locked, the last hunk is
unnecessary, but anyway this diff is ok by me.
> Index: kern/uipc_socket.c
> ===================================================================
> RCS file: /data/mirror/openbsd/cvs/src/sys/kern/uipc_socket.c,v
> diff -u -p -r1.344 uipc_socket.c
> --- kern/uipc_socket.c 31 Oct 2024 12:51:55 -0000 1.344
> +++ kern/uipc_socket.c 8 Nov 2024 16:07:53 -0000
> @@ -2433,6 +2433,8 @@ int
> filt_soread(struct knote *kn, long hint)
> {
> struct socket *so = kn->kn_fp->f_data;
> + u_int state = READ_ONCE(so->so_state);
> + u_int error = READ_ONCE(so->so_error);
> int rv = 0;
>
> MUTEX_ASSERT_LOCKED(&so->so_rcv.sb_mtx);
> @@ -2440,18 +2442,20 @@ filt_soread(struct knote *kn, long hint)
> soassertlocked_readonly(so);
>
> if (so->so_options & SO_ACCEPTCONN) {
> + short qlen = READ_ONCE(so->so_qlen);
> +
> if (so->so_rcv.sb_flags & SB_MTXLOCK)
> soassertlocked_readonly(so);
>
> - kn->kn_data = so->so_qlen;
> + kn->kn_data = qlen;
> rv = (kn->kn_data != 0);
>
> if (kn->kn_flags & (__EV_POLL | __EV_SELECT)) {
> - if (so->so_state & SS_ISDISCONNECTED) {
> + if (state & SS_ISDISCONNECTED) {
> kn->kn_flags |= __EV_HUP;
> rv = 1;
> } else {
> - rv = soreadable(so);
> + rv = qlen || soreadable(so);
> }
> }
>
> @@ -2467,12 +2471,12 @@ filt_soread(struct knote *kn, long hint)
> if (so->so_rcv.sb_state & SS_CANTRCVMORE) {
> kn->kn_flags |= EV_EOF;
> if (kn->kn_flags & __EV_POLL) {
> - if (so->so_state & SS_ISDISCONNECTED)
> + if (state & SS_ISDISCONNECTED)
> kn->kn_flags |= __EV_HUP;
> }
> - kn->kn_fflags = so->so_error;
> + kn->kn_fflags = error;
> rv = 1;
> - } else if (so->so_error) {
> + } else if (error) {
> rv = 1;
> } else if (kn->kn_sfflags & NOTE_LOWAT) {
> rv = (kn->kn_data >= kn->kn_sdata);
> @@ -2495,6 +2499,8 @@ int
> filt_sowrite(struct knote *kn, long hint)
> {
> struct socket *so = kn->kn_fp->f_data;
> + u_int state = READ_ONCE(so->so_state);
> + u_int error = READ_ONCE(so->so_error);
> int rv;
>
> MUTEX_ASSERT_LOCKED(&so->so_snd.sb_mtx);
> @@ -2505,14 +2511,14 @@ filt_sowrite(struct knote *kn, long hint
> if (so->so_snd.sb_state & SS_CANTSENDMORE) {
> kn->kn_flags |= EV_EOF;
> if (kn->kn_flags & __EV_POLL) {
> - if (so->so_state & SS_ISDISCONNECTED)
> + if (state & SS_ISDISCONNECTED)
> kn->kn_flags |= __EV_HUP;
> }
> - kn->kn_fflags = so->so_error;
> + kn->kn_fflags = error;
> rv = 1;
> - } else if (so->so_error) {
> + } else if (error) {
> rv = 1;
> - } else if (((so->so_state & SS_ISCONNECTED) == 0) &&
> + } else if (((state & SS_ISCONNECTED) == 0) &&
> (so->so_proto->pr_flags & PR_CONNREQUIRED)) {
> rv = 0;
> } else if (kn->kn_sfflags & NOTE_LOWAT) {
> @@ -2548,7 +2554,9 @@ filt_soexcept(struct knote *kn, long hin
> }
>
> if (kn->kn_flags & __EV_POLL) {
> - if (so->so_state & SS_ISDISCONNECTED) {
> + u_int state = READ_ONCE(so->so_state);
> +
> + if (state & SS_ISDISCONNECTED) {
> kn->kn_flags |= __EV_HUP;
> rv = 1;
> }
> Index: netinet/tcp_input.c
> ===================================================================
> RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/tcp_input.c,v
> diff -u -p -r1.407 tcp_input.c
> --- netinet/tcp_input.c 26 Aug 2024 13:55:14 -0000 1.407
> +++ netinet/tcp_input.c 8 Nov 2024 16:07:53 -0000
> @@ -1896,10 +1896,12 @@ step6:
> */
> if (SEQ_GT(th->th_seq+th->th_urp, tp->rcv_up)) {
> tp->rcv_up = th->th_seq + th->th_urp;
> + mtx_enter(&so->so_rcv.sb_mtx);
> so->so_oobmark = so->so_rcv.sb_cc +
> (tp->rcv_up - tp->rcv_nxt) - 1;
> if (so->so_oobmark == 0)
> so->so_rcv.sb_state |= SS_RCVATMARK;
> + mtx_leave(&so->so_rcv.sb_mtx);
> sohasoutofband(so);
> tp->t_oobflags &= ~(TCPOOB_HAVEDATA | TCPOOB_HADDATA);
> }
socket filter read once