Index | Thread | Search

From:
Vitaliy Makkoveev <mvs@openbsd.org>
Subject:
Re: make tcp_mss() MP safe
To:
Alexander Bluhm <bluhm@openbsd.org>
Cc:
tech@openbsd.org
Date:
Mon, 23 Dec 2024 01:38:00 +0300

Download raw body.

Thread
On Sat, Dec 21, 2024 at 12:50:37AM +0100, Alexander Bluhm wrote:
> Hi,
> 
> Make tcp_mss() MP safe so that it could be called with shared net
> lock together with socket lock.
> 
> Document that inp_route is protected by socket lock.
> 
> Read tcp_mssdflt and tcp_do_rfc3390 atomically.  Note that tcp_mssdflt
> is used in more places, so it still needs net lock to be written.
> 
> Address family must be AF_INET or AF_INET6.  Panic if not.  Makes
> goto out a bit simpler.
> 
> rt->rt_mtu is read once, another thread might modify it.
> 
> Fix the signed vs. unsigned comparison with max() and min().
> 
> ok?
> 

Hi,

> @@ -679,14 +680,14 @@ extern	struct pool tcpcb_pool;
>  extern	struct inpcbtable tcbtable, tcb6table;	/* queue of active tcpcb's */
>  extern	int tcp_do_rfc1323;	/* enabled/disabled? */
>  extern	int tcptv_keep_init;	/* [N] time to keep alive initial SYN packet */
> -extern	int tcp_mssdflt;	/* default maximum segment size */
> +extern	int tcp_mssdflt;	/* [N] default maximum segment size */
>  extern	int tcp_rst_ppslim;	/* maximum outgoing RST packet per second */

Please, put locks description to the tcp_usrreq.c where variables are
defined. Also `tcp_mssdflt' should be  [a], not [N]. The rest is ok mvs@.

> bluhm
> 
> Index: netinet/in_pcb.h
> ===================================================================
> RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/in_pcb.h,v
> diff -u -p -r1.159 in_pcb.h
> --- netinet/in_pcb.h	5 Nov 2024 10:49:23 -0000	1.159
> +++ netinet/in_pcb.h	20 Dec 2024 21:38:31 -0000
> @@ -140,7 +140,7 @@ struct inpcb {
>  	u_int16_t inp_lport;		/* [t] local port */
>  	struct	  socket *inp_socket;	/* [I] back pointer to socket */
>  	caddr_t	  inp_ppcb;		/* pointer to per-protocol pcb */
> -	struct    route inp_route;	/* cached route */
> +	struct    route inp_route;	/* [s] cached route */
>  	struct    refcnt inp_refcnt;	/* refcount PCB, delay memory free */
>  	int	  inp_flags;		/* generic IP/datagram flags */
>  	union {				/* Header prototype. */
> Index: netinet/tcp_input.c
> ===================================================================
> RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/tcp_input.c,v
> diff -u -p -r1.410 tcp_input.c
> --- netinet/tcp_input.c	20 Dec 2024 19:20:34 -0000	1.410
> +++ netinet/tcp_input.c	20 Dec 2024 22:26:41 -0000
> @@ -2804,17 +2804,13 @@ int
>  tcp_mss(struct tcpcb *tp, int offer)
>  {
>  	struct rtentry *rt;
> -	struct ifnet *ifp = NULL;
> -	int mss, mssopt;
> -	int iphlen;
> -	struct inpcb *inp;
> +	struct ifnet *ifp;
> +	int mss, mssopt, mssdflt, iphlen, do_rfc3390;
> +	u_int rtmtu;
>  
> -	inp = tp->t_inpcb;
> -
> -	mssopt = mss = tcp_mssdflt;
> -
> -	rt = in_pcbrtentry(inp);
> +	mss = mssopt = mssdflt = atomic_load_int(&tcp_mssdflt);
>  
> +	rt = in_pcbrtentry(tp->t_inpcb);
>  	if (rt == NULL)
>  		goto out;
>  
> @@ -2823,29 +2819,29 @@ tcp_mss(struct tcpcb *tp, int offer)
>  		goto out;
>  
>  	switch (tp->pf) {
> +	case AF_INET:
> +		iphlen = sizeof(struct ip);
> +		break;
>  #ifdef INET6
>  	case AF_INET6:
>  		iphlen = sizeof(struct ip6_hdr);
>  		break;
>  #endif
> -	case AF_INET:
> -		iphlen = sizeof(struct ip);
> -		break;
>  	default:
> -		/* the family does not support path MTU discovery */
> -		goto out;
> +		unhandled_af(tp->pf);
>  	}
>  
>  	/*
>  	 * if there's an mtu associated with the route and we support
>  	 * path MTU discovery for the underlying protocol family, use it.
>  	 */
> -	if (rt->rt_mtu) {
> +	rtmtu = READ_ONCE(rt->rt_mtu);
> +	if (rtmtu) {
>  		/*
>  		 * One may wish to lower MSS to take into account options,
>  		 * especially security-related options.
>  		 */
> -		if (tp->pf == AF_INET6 && rt->rt_mtu < IPV6_MMTU) {
> +		if (tp->pf == AF_INET6 && rtmtu < IPV6_MMTU) {
>  			/*
>  			 * RFC2460 section 5, last paragraph: if path MTU is
>  			 * smaller than 1280, use 1280 as packet size and
> @@ -2854,8 +2850,7 @@ tcp_mss(struct tcpcb *tp, int offer)
>  			mss = IPV6_MMTU - iphlen - sizeof(struct ip6_frag) -
>  			    sizeof(struct tcphdr);
>  		} else {
> -			mss = rt->rt_mtu - iphlen -
> -			    sizeof(struct tcphdr);
> +			mss = rtmtu - iphlen - sizeof(struct tcphdr);
>  		}
>  	} else if (ifp->if_flags & IFF_LOOPBACK) {
>  		mss = ifp->if_mtu - iphlen - sizeof(struct tcphdr);
> @@ -2876,10 +2871,10 @@ tcp_mss(struct tcpcb *tp, int offer)
>  	/* Calculate the value that we offer in TCPOPT_MAXSEG */
>  	if (offer != -1) {
>  		mssopt = ifp->if_mtu - iphlen - sizeof(struct tcphdr);
> -		mssopt = max(tcp_mssdflt, mssopt);
> +		mssopt = imax(mssopt, mssdflt);
>  	}
> - out:
>  	if_put(ifp);
> + out:
>  	/*
>  	 * The current mss, t_maxseg, is initialized to the default value.
>  	 * If we compute a smaller value, reduce the current mss.
> @@ -2893,10 +2888,10 @@ tcp_mss(struct tcpcb *tp, int offer)
>  	if (offer > 0)
>  		tp->t_peermss = offer;
>  	if (tp->t_peermss)
> -		mss = min(mss, max(tp->t_peermss, 216));
> +		mss = imin(mss, max(tp->t_peermss, 216));
>  
>  	/* sanity - at least max opt. space */
> -	mss = max(mss, 64);
> +	mss = imax(mss, 64);
>  
>  	/*
>  	 * maxopd stores the maximum length of data AND options
> @@ -2915,6 +2910,7 @@ tcp_mss(struct tcpcb *tp, int offer)
>  		mss -= TCPOLEN_SIGLEN;
>  #endif
>  
> +	do_rfc3390 = atomic_load_int(&tcp_do_rfc3390);
>  	if (offer == -1) {
>  		/* mss changed due to Path MTU discovery */
>  		tp->t_flags &= ~TF_PMTUD_PEND;
> @@ -2929,10 +2925,10 @@ tcp_mss(struct tcpcb *tp, int offer)
>  			tp->snd_cwnd = ulmax((tp->snd_cwnd / tp->t_maxseg) *
>  			    mss, mss);
>  		}
> -	} else if (tcp_do_rfc3390 == 2) {
> +	} else if (do_rfc3390 == 2) {
>  		/* increase initial window  */
>  		tp->snd_cwnd = ulmin(10 * mss, ulmax(2 * mss, 14600));
> -	} else if (tcp_do_rfc3390) {
> +	} else if (do_rfc3390) {
>  		/* increase initial window  */
>  		tp->snd_cwnd = ulmin(4 * mss, ulmax(2 * mss, 4380));
>  	} else
> @@ -3020,7 +3016,6 @@ tcp_mss_update(struct tcpcb *tp)
>  		(void)sbreserve(so, &so->so_rcv, bufsize);
>  	}
>  	mtx_leave(&so->so_rcv.sb_mtx);
> -
>  }
>  
>  /*
> Index: netinet/tcp_var.h
> ===================================================================
> RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/tcp_var.h,v
> diff -u -p -r1.178 tcp_var.h
> --- netinet/tcp_var.h	13 May 2024 01:15:53 -0000	1.178
> +++ netinet/tcp_var.h	20 Dec 2024 22:22:06 -0000
> @@ -227,6 +227,7 @@ struct tcp_opt_info {
>  
>  /*
>   * Locks used to protect global data and struct members:
> + *	a	atomic operations
>   *	I	immutable after creation
>   *	N	net lock
>   *	S	syn_cache_mtx		tcp syn cache global mutex
> @@ -679,14 +680,14 @@ extern	struct pool tcpcb_pool;
>  extern	struct inpcbtable tcbtable, tcb6table;	/* queue of active tcpcb's */
>  extern	int tcp_do_rfc1323;	/* enabled/disabled? */
>  extern	int tcptv_keep_init;	/* [N] time to keep alive initial SYN packet */
> -extern	int tcp_mssdflt;	/* default maximum segment size */
> +extern	int tcp_mssdflt;	/* [N] default maximum segment size */
>  extern	int tcp_rst_ppslim;	/* maximum outgoing RST packet per second */
>  extern	int tcp_ack_on_push;	/* ACK immediately on PUSH */
>  extern	int tcp_do_sack;	/* SACK enabled/disabled */
>  extern	struct pool sackhl_pool;
>  extern	int tcp_sackhole_limit;	/* max entries for tcp sack queues */
>  extern	int tcp_do_ecn;		/* RFC3168 ECN enabled/disabled? */
> -extern	int tcp_do_rfc3390;	/* RFC3390 Increasing TCP's Initial Window */
> +extern	int tcp_do_rfc3390;	/* [a] RFC3390 Increasing TCP Initial Window */
>  extern	int tcp_do_tso;		/* enable TSO for TCP output packets */
>  
>  extern	struct pool tcpqe_pool;
>