Index | Thread | Search

From:
Alexander Bluhm <bluhm@openbsd.org>
Subject:
Re: route mtu atomic
To:
tech@openbsd.org
Date:
Fri, 3 Jan 2025 14:19:54 +0100

Download raw body.

Thread
anyone?

On Thu, Dec 26, 2024 at 12:11:50PM +0100, Alexander Bluhm wrote:
> Hi,
> 
> With TCP unlocking we run into the problem that path MTU discovery
> happens in parallel.  To keep route mtu consistent, I want to make
> access to rt_mtu atomic.
> 
> The compare-and-swap function is used to detect if another thread
> is modifying the mtu.  In this case skip updating rt_mtu.
> 
> ok?
> 
> bluhm
> 
> Index: net/if_loop.c
> ===================================================================
> RCS file: /data/mirror/openbsd/cvs/src/sys/net/if_loop.c,v
> diff -u -p -r1.98 if_loop.c
> --- net/if_loop.c	29 Dec 2023 11:43:04 -0000	1.98
> +++ net/if_loop.c	25 Dec 2024 17:22:50 -0000
> @@ -277,8 +277,8 @@ looutput(struct ifnet *ifp, struct mbuf 
>  void
>  lortrequest(struct ifnet *ifp, int cmd, struct rtentry *rt)
>  {
> -	if (rt && rt->rt_mtu == 0)
> -		rt->rt_mtu = LOMTU;
> +	if (rt != NULL)
> +		atomic_cas_uint(&rt->rt_mtu, 0, LOMTU);
>  }
>  
>  /*
> Index: net/route.c
> ===================================================================
> RCS file: /data/mirror/openbsd/cvs/src/sys/net/route.c,v
> diff -u -p -r1.437 route.c
> --- net/route.c	20 Sep 2024 02:00:46 -0000	1.437
> +++ net/route.c	25 Dec 2024 17:35:50 -0000
> @@ -557,8 +557,14 @@ rt_setgwroute(struct rtentry *rt, const 
>  	 * If the MTU of next hop is 0, this will reset the MTU of the
>  	 * route to run PMTUD again from scratch.
>  	 */
> -	if (!ISSET(rt->rt_locks, RTV_MTU) && (rt->rt_mtu > nhrt->rt_mtu))
> -		rt->rt_mtu = nhrt->rt_mtu;
> +	if (!ISSET(rt->rt_locks, RTV_MTU)) {
> +		u_int mtu, nhmtu;
> +
> +		mtu = atomic_load_int(&rt->rt_mtu);
> +		nhmtu = atomic_load_int(&nhrt->rt_mtu);
> +		if (mtu > nhmtu)
> +			atomic_cas_uint(&rt->rt_mtu, mtu, nhmtu);
> +	}
>  
>  	/*
>  	 * To avoid reference counting problems when writing link-layer
> Index: net/route.h
> ===================================================================
> RCS file: /data/mirror/openbsd/cvs/src/sys/net/route.h,v
> diff -u -p -r1.210 route.h
> --- net/route.h	31 Mar 2024 15:53:12 -0000	1.210
> +++ net/route.h	25 Dec 2024 17:24:16 -0000
> @@ -61,7 +61,7 @@ struct rt_kmetrics {
>  	u_int64_t	rmx_pksent;	/* packets sent using this route */
>  	int64_t		rmx_expire;	/* lifetime for route, e.g. redirect */
>  	u_int		rmx_locks;	/* Kernel must leave these values */
> -	u_int		rmx_mtu;	/* MTU for this path */
> +	u_int		rmx_mtu;	/* [a] MTU for this path */
>  };
>  #endif
>  
> Index: net/rtsock.c
> ===================================================================
> RCS file: /data/mirror/openbsd/cvs/src/sys/net/rtsock.c,v
> diff -u -p -r1.375 rtsock.c
> --- net/rtsock.c	12 Jul 2024 17:20:18 -0000	1.375
> +++ net/rtsock.c	25 Dec 2024 17:39:32 -0000
> @@ -1345,7 +1345,7 @@ route_cleargateway(struct rtentry *rt, v
>  
>  	if (ISSET(rt->rt_flags, RTF_GATEWAY) && rt->rt_gwroute == nhrt &&
>  	    !ISSET(rt->rt_locks, RTV_MTU))
> -		rt->rt_mtu = 0;
> +		atomic_store_int(&rt->rt_mtu, 0);
>  
>  	return (0);
>  }
> @@ -1393,7 +1393,7 @@ rtm_setmetrics(u_long which, const struc
>  	int64_t expire;
>  
>  	if (which & RTV_MTU)
> -		out->rmx_mtu = in->rmx_mtu;
> +		atomic_store_int(&out->rmx_mtu, in->rmx_mtu);
>  	if (which & RTV_EXPIRE) {
>  		expire = in->rmx_expire;
>  		if (expire != 0) {
> @@ -1421,7 +1421,7 @@ rtm_getmetrics(const struct rtentry *rt,
>  
>  	bzero(out, sizeof(*out));
>  	out->rmx_locks = in->rmx_locks;
> -	out->rmx_mtu = in->rmx_mtu;
> +	out->rmx_mtu = atomic_load_int(&in->rmx_mtu);
>  	out->rmx_expire = expire;
>  	out->rmx_pksent = in->rmx_pksent;
>  }
> Index: netinet/ip_icmp.c
> ===================================================================
> RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/ip_icmp.c,v
> diff -u -p -r1.197 ip_icmp.c
> --- netinet/ip_icmp.c	4 Dec 2024 22:24:11 -0000	1.197
> +++ netinet/ip_icmp.c	25 Dec 2024 17:46:46 -0000
> @@ -1008,6 +1008,7 @@ icmp_mtudisc(struct icmp *icp, u_int rta
>  {
>  	struct rtentry *rt;
>  	struct ifnet *ifp;
> +	u_int rtmtu;
>  	u_long mtu = ntohs(icp->icmp_nextmtu);  /* Why a long?  IPv6 */
>  
>  	rt = icmp_mtudisc_clone(icp->icmp_ip.ip_dst, rtableid, 0);
> @@ -1020,17 +1021,18 @@ icmp_mtudisc(struct icmp *icp, u_int rta
>  		return;
>  	}
>  
> +	rtmtu = atomic_load_int(&rt->rt_mtu);
>  	if (mtu == 0) {
>  		int i = 0;
>  
>  		mtu = ntohs(icp->icmp_ip.ip_len);
>  		/* Some 4.2BSD-based routers incorrectly adjust the ip_len */
> -		if (mtu > rt->rt_mtu && rt->rt_mtu != 0)
> +		if (mtu > rtmtu && rtmtu != 0)
>  			mtu -= (icp->icmp_ip.ip_hl << 2);
>  
>  		/* If we still can't guess a value, try the route */
>  		if (mtu == 0) {
> -			mtu = rt->rt_mtu;
> +			mtu = rtmtu;
>  
>  			/* If no route mtu, default to the interface mtu */
>  
> @@ -1055,8 +1057,8 @@ icmp_mtudisc(struct icmp *icp, u_int rta
>  	if ((rt->rt_locks & RTV_MTU) == 0) {
>  		if (mtu < 296 || mtu > ifp->if_mtu)
>  			rt->rt_locks |= RTV_MTU;
> -		else if (rt->rt_mtu > mtu || rt->rt_mtu == 0)
> -			rt->rt_mtu = mtu;
> +		else if (rtmtu > mtu || rtmtu == 0)
> +			atomic_cas_uint(&rt->rt_mtu, rtmtu, mtu);
>  	}
>  
>  	if_put(ifp);
> @@ -1089,7 +1091,7 @@ icmp_mtudisc_timeout(struct rtentry *rt,
>  			    rtableid, NULL);
>  	} else {
>  		if ((rt->rt_locks & RTV_MTU) == 0)
> -			rt->rt_mtu = 0;
> +			atomic_store_int(&rt->rt_mtu, 0);
>  	}
>  
>  	if_put(ifp);
> Index: netinet/ip_input.c
> ===================================================================
> RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/ip_input.c,v
> diff -u -p -r1.402 ip_input.c
> --- netinet/ip_input.c	21 Nov 2024 20:15:44 -0000	1.402
> +++ netinet/ip_input.c	25 Dec 2024 17:49:03 -0000
> @@ -1650,8 +1650,11 @@ ip_forward(struct mbuf *m, struct ifnet 
>  		type = ICMP_UNREACH;
>  		code = ICMP_UNREACH_NEEDFRAG;
>  		if (rt != NULL) {
> -			if (rt->rt_mtu) {
> -				destmtu = rt->rt_mtu;
> +			u_int rtmtu;
> +
> +			rtmtu = atomic_load_int(&rt->rt_mtu);
> +			if (rtmtu != 0) {
> +				destmtu = rtmtu;
>  			} else {
>  				struct ifnet *destifp;
>  
> Index: netinet/ip_output.c
> ===================================================================
> RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/ip_output.c,v
> diff -u -p -r1.401 ip_output.c
> --- netinet/ip_output.c	2 Jul 2024 18:33:47 -0000	1.401
> +++ netinet/ip_output.c	25 Dec 2024 18:55:12 -0000
> @@ -211,7 +211,8 @@ reroute:
>  			error = EHOSTUNREACH;
>  			goto bad;
>  		}
> -		if ((mtu = ro->ro_rt->rt_mtu) == 0)
> +		mtu = atomic_load_int(&ro->ro_rt->rt_mtu);
> +		if (mtu == 0)
>  			mtu = ifp->if_mtu;
>  
>  		if (ro->ro_rt->rt_flags & RTF_GATEWAY)
> @@ -470,9 +471,14 @@ sendit:
>  		 */
>  		if (rtisvalid(ro->ro_rt) &&
>  		    ISSET(ro->ro_rt->rt_flags, RTF_HOST) &&
> -		    !(ro->ro_rt->rt_locks & RTV_MTU) &&
> -		    (ro->ro_rt->rt_mtu > ifp->if_mtu)) {
> -			ro->ro_rt->rt_mtu = ifp->if_mtu;
> +		    !(ro->ro_rt->rt_locks & RTV_MTU)) {
> +			u_int rtmtu;
> +
> +			rtmtu = atomic_load_int(&ro->ro_rt->rt_mtu);
> +			if (rtmtu > ifp->if_mtu) {
> +				atomic_cas_uint(&ro->ro_rt->rt_mtu, rtmtu,
> +				    ifp->if_mtu);
> +			}
>  		}
>  		ipstat_inc(ips_cantfrag);
>  		goto bad;
> @@ -558,7 +564,7 @@ ip_output_ipsec_pmtu_update(struct tdb *
>  	DPRINTF("spi %08x mtu %d rt %p cloned %d",
>  	    ntohl(tdb->tdb_spi), tdb->tdb_mtu, rt, rt_mtucloned);
>  	if (rt != NULL) {
> -		rt->rt_mtu = tdb->tdb_mtu;
> +		atomic_store_int(&rt->rt_mtu, tdb->tdb_mtu);
>  		if (ro != NULL && ro->ro_rt != NULL) {
>  			rtfree(ro->ro_rt);
>  			ro->ro_rt = rtalloc(&ro->ro_dstsa, RT_RESOLVE,
> Index: netinet/tcp_input.c
> ===================================================================
> RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/tcp_input.c,v
> diff -u -p -r1.412 tcp_input.c
> --- netinet/tcp_input.c	24 Dec 2024 19:16:53 -0000	1.412
> +++ netinet/tcp_input.c	25 Dec 2024 18:56:37 -0000
> @@ -2835,7 +2835,7 @@ tcp_mss(struct tcpcb *tp, int offer)
>  	 * if there's an mtu associated with the route and we support
>  	 * path MTU discovery for the underlying protocol family, use it.
>  	 */
> -	rtmtu = READ_ONCE(rt->rt_mtu);
> +	rtmtu = atomic_load_int(&rt->rt_mtu);
>  	if (rtmtu) {
>  		/*
>  		 * One may wish to lower MSS to take into account options,
> Index: netinet6/icmp6.c
> ===================================================================
> RCS file: /data/mirror/openbsd/cvs/src/sys/netinet6/icmp6.c,v
> diff -u -p -r1.255 icmp6.c
> --- netinet6/icmp6.c	12 Aug 2024 11:25:27 -0000	1.255
> +++ netinet6/icmp6.c	25 Dec 2024 19:00:50 -0000
> @@ -1016,16 +1016,20 @@ icmp6_mtudisc_update(struct ip6ctlparam 
>  	rt = icmp6_mtudisc_clone(&sin6, m->m_pkthdr.ph_rtableid, 0);
>  
>  	if (rt != NULL && ISSET(rt->rt_flags, RTF_HOST) &&
> -	    !(rt->rt_locks & RTV_MTU) &&
> -	    (rt->rt_mtu > mtu || rt->rt_mtu == 0)) {
> -		struct ifnet *ifp;
> -
> -		ifp = if_get(rt->rt_ifidx);
> -		if (ifp != NULL && mtu < ifp->if_mtu) {
> -			icmp6stat_inc(icp6s_pmtuchg);
> -			rt->rt_mtu = mtu;
> +	    !(rt->rt_locks & RTV_MTU)) {
> +		u_int rtmtu;
> +
> +		rtmtu = atomic_load_int(&rt->rt_mtu);
> +		if (rtmtu > mtu || rtmtu == 0) {
> +			struct ifnet *ifp;
> +
> +			ifp = if_get(rt->rt_ifidx);
> +			if (ifp != NULL && mtu < ifp->if_mtu) {
> +				icmp6stat_inc(icp6s_pmtuchg);
> +				atomic_cas_uint(&rt->rt_mtu, rtmtu, mtu);
> +			}
> +			if_put(ifp);
>  		}
> -		if_put(ifp);
>  	}
>  	rtfree(rt);
>  
> @@ -1848,7 +1852,7 @@ icmp6_mtudisc_timeout(struct rtentry *rt
>  		rtdeletemsg(rt, ifp, rtableid);
>  	} else {
>  		if (!(rt->rt_locks & RTV_MTU))
> -			rt->rt_mtu = 0;
> +			atomic_store_int(&rt->rt_mtu, 0);
>  	}
>  
>  	if_put(ifp);
> Index: netinet6/ip6_forward.c
> ===================================================================
> RCS file: /data/mirror/openbsd/cvs/src/sys/netinet6/ip6_forward.c,v
> diff -u -p -r1.124 ip6_forward.c
> --- netinet6/ip6_forward.c	19 Jul 2024 16:58:32 -0000	1.124
> +++ netinet6/ip6_forward.c	25 Dec 2024 19:02:20 -0000
> @@ -399,8 +399,11 @@ senderr:
>  	case EMSGSIZE:
>  		type = ICMP6_PACKET_TOO_BIG;
>  		if (rt != NULL) {
> -			if (rt->rt_mtu) {
> -				destmtu = rt->rt_mtu;
> +			u_int rtmtu;
> +
> +			rtmtu = atomic_load_int(&rt->rt_mtu);
> +			if (rtmtu != 0) {
> +				destmtu = rtmtu;
>  			} else {
>  				struct ifnet *destifp;
>  
> Index: netinet6/ip6_output.c
> ===================================================================
> RCS file: /data/mirror/openbsd/cvs/src/sys/netinet6/ip6_output.c,v
> diff -u -p -r1.292 ip6_output.c
> --- netinet6/ip6_output.c	4 Jul 2024 12:50:08 -0000	1.292
> +++ netinet6/ip6_output.c	25 Dec 2024 19:16:03 -0000
> @@ -156,8 +156,8 @@ struct idgen32_ctx ip6_id_ctx;
>   * The mbuf chain containing the packet will be freed.
>   * The mbuf opt, if present, will not be freed.
>   *
> - * type of "mtu": rt_mtu is u_long, ifnet.ifr_mtu is int.
> - * We use u_long to hold largest one, * which is rt_mtu.
> + * type of "mtu": rt_mtu is u_int, ifnet.ifr_mtu is int.
> + * We use u_long to hold largest one.  XXX should be u_int
>   */
>  int
>  ip6_output(struct mbuf *m, struct ip6_pktopts *opt, struct route *ro,
> @@ -1027,11 +1027,11 @@ ip6_insertfraghdr(struct mbuf *m0, struc
>  int
>  ip6_getpmtu(struct rtentry *rt, struct ifnet *ifp, u_long *mtup)
>  {
> -	u_int32_t mtu = 0;
> +	u_int mtu, rtmtu;
>  	int error = 0;
>  
>  	if (rt != NULL) {
> -		mtu = rt->rt_mtu;
> +		mtu = rtmtu = atomic_load_int(&rt->rt_mtu);
>  		if (mtu == 0)
>  			mtu = ifp->if_mtu;
>  		else if (mtu < IPV6_MMTU) {
> @@ -1048,7 +1048,7 @@ ip6_getpmtu(struct rtentry *rt, struct i
>  			 */
>  			mtu = ifp->if_mtu;
>  			if (!(rt->rt_locks & RTV_MTU))
> -				rt->rt_mtu = mtu;
> +				atomic_cas_uint(&rt->rt_mtu, rtmtu, mtu);
>  		}
>  	} else {
>  		mtu = ifp->if_mtu;
> @@ -2812,7 +2812,7 @@ ip6_output_ipsec_pmtu_update(struct tdb 
>  	DPRINTF("spi %08x mtu %d rt %p cloned %d",
>  	    ntohl(tdb->tdb_spi), tdb->tdb_mtu, rt, rt_mtucloned);
>  	if (rt != NULL) {
> -		rt->rt_mtu = tdb->tdb_mtu;
> +		atomic_store_int(&rt->rt_mtu, tdb->tdb_mtu);
>  		if (ro != NULL && ro->ro_rt != NULL) {
>  			rtfree(ro->ro_rt);
>  			ro->ro_rt = rtalloc(&ro->ro_dstsa, RT_RESOLVE,
> Index: sys/atomic.h
> ===================================================================
> RCS file: /data/mirror/openbsd/cvs/src/sys/sys/atomic.h,v
> diff -u -p -r1.9 atomic.h
> --- sys/atomic.h	21 Mar 2022 05:45:52 -0000	1.9
> +++ sys/atomic.h	25 Dec 2024 17:38:09 -0000
> @@ -202,13 +202,13 @@ atomic_sub_long_nv(volatile unsigned lon
>   */
>  
>  static inline unsigned int
> -atomic_load_int(volatile unsigned int *p)
> +atomic_load_int(volatile const unsigned int *p)
>  {
>  	return *p;
>  }
>  
>  static inline unsigned long
> -atomic_load_long(volatile unsigned long *p)
> +atomic_load_long(volatile const unsigned long *p)
>  {
>  	return *p;
>  }