Index | Thread | Search

From:
Alexander Bluhm <bluhm@openbsd.org>
Subject:
Re: socket filter read once
To:
Vitaliy Makkoveev <mvs@openbsd.org>
Cc:
tech@openbsd.org
Date:
Fri, 8 Nov 2024 17:14:46 +0100

Download raw body.

Thread
On Fri, Nov 08, 2024 at 06:42:26PM +0300, Vitaliy Makkoveev wrote:
> On Fri, Nov 08, 2024 at 01:50:20PM +0100, Alexander Bluhm wrote:
> > Hi,
> > 
> > The socket filt_...() funtions are called with shared netlock, but
> > without per socket lock.  This can be done as they are read only.
> > After unlocking, TCP will modify socket variables in parallel.  So
> > I would like to explicitly mark with READ_ONCE() where we have
> > unlocked access.
> > 
> > ok?
> > 
> 
> `so_oobmark' belongs to reception path, so I want to use `so_rcv'
> mutex(9) to protect it and already marked this in locking description.
> This mutex(9) is held, so you don't need to load it to local variable.

Like this?

Index: kern/uipc_socket.c
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/kern/uipc_socket.c,v
diff -u -p -r1.344 uipc_socket.c
--- kern/uipc_socket.c	31 Oct 2024 12:51:55 -0000	1.344
+++ kern/uipc_socket.c	8 Nov 2024 16:07:53 -0000
@@ -2433,6 +2433,8 @@ int
 filt_soread(struct knote *kn, long hint)
 {
 	struct socket *so = kn->kn_fp->f_data;
+	u_int state = READ_ONCE(so->so_state);
+	u_int error = READ_ONCE(so->so_error);
 	int rv = 0;
 
 	MUTEX_ASSERT_LOCKED(&so->so_rcv.sb_mtx);
@@ -2440,18 +2442,20 @@ filt_soread(struct knote *kn, long hint)
 		soassertlocked_readonly(so);
 
 	if (so->so_options & SO_ACCEPTCONN) {
+		short qlen = READ_ONCE(so->so_qlen);
+
 		if (so->so_rcv.sb_flags & SB_MTXLOCK)
 			soassertlocked_readonly(so);
 
-		kn->kn_data = so->so_qlen;
+		kn->kn_data = qlen;
 		rv = (kn->kn_data != 0);
 
 		if (kn->kn_flags & (__EV_POLL | __EV_SELECT)) {
-			if (so->so_state & SS_ISDISCONNECTED) {
+			if (state & SS_ISDISCONNECTED) {
 				kn->kn_flags |= __EV_HUP;
 				rv = 1;
 			} else {
-				rv = soreadable(so);
+				rv = qlen || soreadable(so);
 			}
 		}
 
@@ -2467,12 +2471,12 @@ filt_soread(struct knote *kn, long hint)
 	if (so->so_rcv.sb_state & SS_CANTRCVMORE) {
 		kn->kn_flags |= EV_EOF;
 		if (kn->kn_flags & __EV_POLL) {
-			if (so->so_state & SS_ISDISCONNECTED)
+			if (state & SS_ISDISCONNECTED)
 				kn->kn_flags |= __EV_HUP;
 		}
-		kn->kn_fflags = so->so_error;
+		kn->kn_fflags = error;
 		rv = 1;
-	} else if (so->so_error) {
+	} else if (error) {
 		rv = 1;
 	} else if (kn->kn_sfflags & NOTE_LOWAT) {
 		rv = (kn->kn_data >= kn->kn_sdata);
@@ -2495,6 +2499,8 @@ int
 filt_sowrite(struct knote *kn, long hint)
 {
 	struct socket *so = kn->kn_fp->f_data;
+	u_int state = READ_ONCE(so->so_state);
+	u_int error = READ_ONCE(so->so_error);
 	int rv;
 
 	MUTEX_ASSERT_LOCKED(&so->so_snd.sb_mtx);
@@ -2505,14 +2511,14 @@ filt_sowrite(struct knote *kn, long hint
 	if (so->so_snd.sb_state & SS_CANTSENDMORE) {
 		kn->kn_flags |= EV_EOF;
 		if (kn->kn_flags & __EV_POLL) {
-			if (so->so_state & SS_ISDISCONNECTED)
+			if (state & SS_ISDISCONNECTED)
 				kn->kn_flags |= __EV_HUP;
 		}
-		kn->kn_fflags = so->so_error;
+		kn->kn_fflags = error;
 		rv = 1;
-	} else if (so->so_error) {
+	} else if (error) {
 		rv = 1;
-	} else if (((so->so_state & SS_ISCONNECTED) == 0) &&
+	} else if (((state & SS_ISCONNECTED) == 0) &&
 	    (so->so_proto->pr_flags & PR_CONNREQUIRED)) {
 		rv = 0;
 	} else if (kn->kn_sfflags & NOTE_LOWAT) {
@@ -2548,7 +2554,9 @@ filt_soexcept(struct knote *kn, long hin
 	}
 
 	if (kn->kn_flags & __EV_POLL) {
-		if (so->so_state & SS_ISDISCONNECTED) {
+		u_int state = READ_ONCE(so->so_state);
+
+		if (state & SS_ISDISCONNECTED) {
 			kn->kn_flags |= __EV_HUP;
 			rv = 1;
 		}
Index: netinet/tcp_input.c
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/tcp_input.c,v
diff -u -p -r1.407 tcp_input.c
--- netinet/tcp_input.c	26 Aug 2024 13:55:14 -0000	1.407
+++ netinet/tcp_input.c	8 Nov 2024 16:07:53 -0000
@@ -1896,10 +1896,12 @@ step6:
 		 */
 		if (SEQ_GT(th->th_seq+th->th_urp, tp->rcv_up)) {
 			tp->rcv_up = th->th_seq + th->th_urp;
+			mtx_enter(&so->so_rcv.sb_mtx);
 			so->so_oobmark = so->so_rcv.sb_cc +
 			    (tp->rcv_up - tp->rcv_nxt) - 1;
 			if (so->so_oobmark == 0)
 				so->so_rcv.sb_state |= SS_RCVATMARK;
+			mtx_leave(&so->so_rcv.sb_mtx);
 			sohasoutofband(so);
 			tp->t_oobflags &= ~(TCPOOB_HAVEDATA | TCPOOB_HADDATA);
 		}