Index | Thread | Search

From:
Alexander Bluhm <bluhm@openbsd.org>
Subject:
make tcp_mss() MP safe
To:
tech@openbsd.org
Date:
Sat, 21 Dec 2024 00:50:37 +0100

Download raw body.

Thread
Hi,

Make tcp_mss() MP safe so that it could be called with shared net
lock together with socket lock.

Document that inp_route is protected by socket lock.

Read tcp_mssdflt and tcp_do_rfc3390 atomically.  Note that tcp_mssdflt
is used in more places, so it still needs net lock to be written.

Address family must be AF_INET or AF_INET6.  Panic if not.  Makes
goto out a bit simpler.

rt->rt_mtu is read once, another thread might modify it.

Fix the signed vs. unsigned comparison with max() and min().

ok?

bluhm

Index: netinet/in_pcb.h
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/in_pcb.h,v
diff -u -p -r1.159 in_pcb.h
--- netinet/in_pcb.h	5 Nov 2024 10:49:23 -0000	1.159
+++ netinet/in_pcb.h	20 Dec 2024 21:38:31 -0000
@@ -140,7 +140,7 @@ struct inpcb {
 	u_int16_t inp_lport;		/* [t] local port */
 	struct	  socket *inp_socket;	/* [I] back pointer to socket */
 	caddr_t	  inp_ppcb;		/* pointer to per-protocol pcb */
-	struct    route inp_route;	/* cached route */
+	struct    route inp_route;	/* [s] cached route */
 	struct    refcnt inp_refcnt;	/* refcount PCB, delay memory free */
 	int	  inp_flags;		/* generic IP/datagram flags */
 	union {				/* Header prototype. */
Index: netinet/tcp_input.c
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/tcp_input.c,v
diff -u -p -r1.410 tcp_input.c
--- netinet/tcp_input.c	20 Dec 2024 19:20:34 -0000	1.410
+++ netinet/tcp_input.c	20 Dec 2024 22:26:41 -0000
@@ -2804,17 +2804,13 @@ int
 tcp_mss(struct tcpcb *tp, int offer)
 {
 	struct rtentry *rt;
-	struct ifnet *ifp = NULL;
-	int mss, mssopt;
-	int iphlen;
-	struct inpcb *inp;
+	struct ifnet *ifp;
+	int mss, mssopt, mssdflt, iphlen, do_rfc3390;
+	u_int rtmtu;
 
-	inp = tp->t_inpcb;
-
-	mssopt = mss = tcp_mssdflt;
-
-	rt = in_pcbrtentry(inp);
+	mss = mssopt = mssdflt = atomic_load_int(&tcp_mssdflt);
 
+	rt = in_pcbrtentry(tp->t_inpcb);
 	if (rt == NULL)
 		goto out;
 
@@ -2823,29 +2819,29 @@ tcp_mss(struct tcpcb *tp, int offer)
 		goto out;
 
 	switch (tp->pf) {
+	case AF_INET:
+		iphlen = sizeof(struct ip);
+		break;
 #ifdef INET6
 	case AF_INET6:
 		iphlen = sizeof(struct ip6_hdr);
 		break;
 #endif
-	case AF_INET:
-		iphlen = sizeof(struct ip);
-		break;
 	default:
-		/* the family does not support path MTU discovery */
-		goto out;
+		unhandled_af(tp->pf);
 	}
 
 	/*
 	 * if there's an mtu associated with the route and we support
 	 * path MTU discovery for the underlying protocol family, use it.
 	 */
-	if (rt->rt_mtu) {
+	rtmtu = READ_ONCE(rt->rt_mtu);
+	if (rtmtu) {
 		/*
 		 * One may wish to lower MSS to take into account options,
 		 * especially security-related options.
 		 */
-		if (tp->pf == AF_INET6 && rt->rt_mtu < IPV6_MMTU) {
+		if (tp->pf == AF_INET6 && rtmtu < IPV6_MMTU) {
 			/*
 			 * RFC2460 section 5, last paragraph: if path MTU is
 			 * smaller than 1280, use 1280 as packet size and
@@ -2854,8 +2850,7 @@ tcp_mss(struct tcpcb *tp, int offer)
 			mss = IPV6_MMTU - iphlen - sizeof(struct ip6_frag) -
 			    sizeof(struct tcphdr);
 		} else {
-			mss = rt->rt_mtu - iphlen -
-			    sizeof(struct tcphdr);
+			mss = rtmtu - iphlen - sizeof(struct tcphdr);
 		}
 	} else if (ifp->if_flags & IFF_LOOPBACK) {
 		mss = ifp->if_mtu - iphlen - sizeof(struct tcphdr);
@@ -2876,10 +2871,10 @@ tcp_mss(struct tcpcb *tp, int offer)
 	/* Calculate the value that we offer in TCPOPT_MAXSEG */
 	if (offer != -1) {
 		mssopt = ifp->if_mtu - iphlen - sizeof(struct tcphdr);
-		mssopt = max(tcp_mssdflt, mssopt);
+		mssopt = imax(mssopt, mssdflt);
 	}
- out:
 	if_put(ifp);
+ out:
 	/*
 	 * The current mss, t_maxseg, is initialized to the default value.
 	 * If we compute a smaller value, reduce the current mss.
@@ -2893,10 +2888,10 @@ tcp_mss(struct tcpcb *tp, int offer)
 	if (offer > 0)
 		tp->t_peermss = offer;
 	if (tp->t_peermss)
-		mss = min(mss, max(tp->t_peermss, 216));
+		mss = imin(mss, max(tp->t_peermss, 216));
 
 	/* sanity - at least max opt. space */
-	mss = max(mss, 64);
+	mss = imax(mss, 64);
 
 	/*
 	 * maxopd stores the maximum length of data AND options
@@ -2915,6 +2910,7 @@ tcp_mss(struct tcpcb *tp, int offer)
 		mss -= TCPOLEN_SIGLEN;
 #endif
 
+	do_rfc3390 = atomic_load_int(&tcp_do_rfc3390);
 	if (offer == -1) {
 		/* mss changed due to Path MTU discovery */
 		tp->t_flags &= ~TF_PMTUD_PEND;
@@ -2929,10 +2925,10 @@ tcp_mss(struct tcpcb *tp, int offer)
 			tp->snd_cwnd = ulmax((tp->snd_cwnd / tp->t_maxseg) *
 			    mss, mss);
 		}
-	} else if (tcp_do_rfc3390 == 2) {
+	} else if (do_rfc3390 == 2) {
 		/* increase initial window  */
 		tp->snd_cwnd = ulmin(10 * mss, ulmax(2 * mss, 14600));
-	} else if (tcp_do_rfc3390) {
+	} else if (do_rfc3390) {
 		/* increase initial window  */
 		tp->snd_cwnd = ulmin(4 * mss, ulmax(2 * mss, 4380));
 	} else
@@ -3020,7 +3016,6 @@ tcp_mss_update(struct tcpcb *tp)
 		(void)sbreserve(so, &so->so_rcv, bufsize);
 	}
 	mtx_leave(&so->so_rcv.sb_mtx);
-
 }
 
 /*
Index: netinet/tcp_var.h
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/tcp_var.h,v
diff -u -p -r1.178 tcp_var.h
--- netinet/tcp_var.h	13 May 2024 01:15:53 -0000	1.178
+++ netinet/tcp_var.h	20 Dec 2024 22:22:06 -0000
@@ -227,6 +227,7 @@ struct tcp_opt_info {
 
 /*
  * Locks used to protect global data and struct members:
+ *	a	atomic operations
  *	I	immutable after creation
  *	N	net lock
  *	S	syn_cache_mtx		tcp syn cache global mutex
@@ -679,14 +680,14 @@ extern	struct pool tcpcb_pool;
 extern	struct inpcbtable tcbtable, tcb6table;	/* queue of active tcpcb's */
 extern	int tcp_do_rfc1323;	/* enabled/disabled? */
 extern	int tcptv_keep_init;	/* [N] time to keep alive initial SYN packet */
-extern	int tcp_mssdflt;	/* default maximum segment size */
+extern	int tcp_mssdflt;	/* [N] default maximum segment size */
 extern	int tcp_rst_ppslim;	/* maximum outgoing RST packet per second */
 extern	int tcp_ack_on_push;	/* ACK immediately on PUSH */
 extern	int tcp_do_sack;	/* SACK enabled/disabled */
 extern	struct pool sackhl_pool;
 extern	int tcp_sackhole_limit;	/* max entries for tcp sack queues */
 extern	int tcp_do_ecn;		/* RFC3168 ECN enabled/disabled? */
-extern	int tcp_do_rfc3390;	/* RFC3390 Increasing TCP's Initial Window */
+extern	int tcp_do_rfc3390;	/* [a] RFC3390 Increasing TCP Initial Window */
 extern	int tcp_do_tso;		/* enable TSO for TCP output packets */
 
 extern	struct pool tcpqe_pool;