Download raw body.
route mtu atomic
ok mvs@
> On 3 Jan 2025, at 16:19, Alexander Bluhm <bluhm@openbsd.org> wrote:
>
> anyone?
>
> On Thu, Dec 26, 2024 at 12:11:50PM +0100, Alexander Bluhm wrote:
>> Hi,
>>
>> With TCP unlocking we run into the problem that path MTU discovery
>> happens in parallel. To keep route mtu consistent, I want to make
>> access to rt_mtu atomic.
>>
>> The compare-and-swap function is used to detect if another thread
>> is modifying the mtu. In this case skip updating rt_mtu.
>>
>> ok?
>>
>> bluhm
>>
>> Index: net/if_loop.c
>> ===================================================================
>> RCS file: /data/mirror/openbsd/cvs/src/sys/net/if_loop.c,v
>> diff -u -p -r1.98 if_loop.c
>> --- net/if_loop.c 29 Dec 2023 11:43:04 -0000 1.98
>> +++ net/if_loop.c 25 Dec 2024 17:22:50 -0000
>> @@ -277,8 +277,8 @@ looutput(struct ifnet *ifp, struct mbuf
>> void
>> lortrequest(struct ifnet *ifp, int cmd, struct rtentry *rt)
>> {
>> - if (rt && rt->rt_mtu == 0)
>> - rt->rt_mtu = LOMTU;
>> + if (rt != NULL)
>> + atomic_cas_uint(&rt->rt_mtu, 0, LOMTU);
>> }
>>
>> /*
>> Index: net/route.c
>> ===================================================================
>> RCS file: /data/mirror/openbsd/cvs/src/sys/net/route.c,v
>> diff -u -p -r1.437 route.c
>> --- net/route.c 20 Sep 2024 02:00:46 -0000 1.437
>> +++ net/route.c 25 Dec 2024 17:35:50 -0000
>> @@ -557,8 +557,14 @@ rt_setgwroute(struct rtentry *rt, const
>> * If the MTU of next hop is 0, this will reset the MTU of the
>> * route to run PMTUD again from scratch.
>> */
>> - if (!ISSET(rt->rt_locks, RTV_MTU) && (rt->rt_mtu > nhrt->rt_mtu))
>> - rt->rt_mtu = nhrt->rt_mtu;
>> + if (!ISSET(rt->rt_locks, RTV_MTU)) {
>> + u_int mtu, nhmtu;
>> +
>> + mtu = atomic_load_int(&rt->rt_mtu);
>> + nhmtu = atomic_load_int(&nhrt->rt_mtu);
>> + if (mtu > nhmtu)
>> + atomic_cas_uint(&rt->rt_mtu, mtu, nhmtu);
>> + }
>>
>> /*
>> * To avoid reference counting problems when writing link-layer
>> Index: net/route.h
>> ===================================================================
>> RCS file: /data/mirror/openbsd/cvs/src/sys/net/route.h,v
>> diff -u -p -r1.210 route.h
>> --- net/route.h 31 Mar 2024 15:53:12 -0000 1.210
>> +++ net/route.h 25 Dec 2024 17:24:16 -0000
>> @@ -61,7 +61,7 @@ struct rt_kmetrics {
>> u_int64_t rmx_pksent; /* packets sent using this route */
>> int64_t rmx_expire; /* lifetime for route, e.g. redirect */
>> u_int rmx_locks; /* Kernel must leave these values */
>> - u_int rmx_mtu; /* MTU for this path */
>> + u_int rmx_mtu; /* [a] MTU for this path */
>> };
>> #endif
>>
>> Index: net/rtsock.c
>> ===================================================================
>> RCS file: /data/mirror/openbsd/cvs/src/sys/net/rtsock.c,v
>> diff -u -p -r1.375 rtsock.c
>> --- net/rtsock.c 12 Jul 2024 17:20:18 -0000 1.375
>> +++ net/rtsock.c 25 Dec 2024 17:39:32 -0000
>> @@ -1345,7 +1345,7 @@ route_cleargateway(struct rtentry *rt, v
>>
>> if (ISSET(rt->rt_flags, RTF_GATEWAY) && rt->rt_gwroute == nhrt &&
>> !ISSET(rt->rt_locks, RTV_MTU))
>> - rt->rt_mtu = 0;
>> + atomic_store_int(&rt->rt_mtu, 0);
>>
>> return (0);
>> }
>> @@ -1393,7 +1393,7 @@ rtm_setmetrics(u_long which, const struc
>> int64_t expire;
>>
>> if (which & RTV_MTU)
>> - out->rmx_mtu = in->rmx_mtu;
>> + atomic_store_int(&out->rmx_mtu, in->rmx_mtu);
>> if (which & RTV_EXPIRE) {
>> expire = in->rmx_expire;
>> if (expire != 0) {
>> @@ -1421,7 +1421,7 @@ rtm_getmetrics(const struct rtentry *rt,
>>
>> bzero(out, sizeof(*out));
>> out->rmx_locks = in->rmx_locks;
>> - out->rmx_mtu = in->rmx_mtu;
>> + out->rmx_mtu = atomic_load_int(&in->rmx_mtu);
>> out->rmx_expire = expire;
>> out->rmx_pksent = in->rmx_pksent;
>> }
>> Index: netinet/ip_icmp.c
>> ===================================================================
>> RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/ip_icmp.c,v
>> diff -u -p -r1.197 ip_icmp.c
>> --- netinet/ip_icmp.c 4 Dec 2024 22:24:11 -0000 1.197
>> +++ netinet/ip_icmp.c 25 Dec 2024 17:46:46 -0000
>> @@ -1008,6 +1008,7 @@ icmp_mtudisc(struct icmp *icp, u_int rta
>> {
>> struct rtentry *rt;
>> struct ifnet *ifp;
>> + u_int rtmtu;
>> u_long mtu = ntohs(icp->icmp_nextmtu); /* Why a long? IPv6 */
>>
>> rt = icmp_mtudisc_clone(icp->icmp_ip.ip_dst, rtableid, 0);
>> @@ -1020,17 +1021,18 @@ icmp_mtudisc(struct icmp *icp, u_int rta
>> return;
>> }
>>
>> + rtmtu = atomic_load_int(&rt->rt_mtu);
>> if (mtu == 0) {
>> int i = 0;
>>
>> mtu = ntohs(icp->icmp_ip.ip_len);
>> /* Some 4.2BSD-based routers incorrectly adjust the ip_len */
>> - if (mtu > rt->rt_mtu && rt->rt_mtu != 0)
>> + if (mtu > rtmtu && rtmtu != 0)
>> mtu -= (icp->icmp_ip.ip_hl << 2);
>>
>> /* If we still can't guess a value, try the route */
>> if (mtu == 0) {
>> - mtu = rt->rt_mtu;
>> + mtu = rtmtu;
>>
>> /* If no route mtu, default to the interface mtu */
>>
>> @@ -1055,8 +1057,8 @@ icmp_mtudisc(struct icmp *icp, u_int rta
>> if ((rt->rt_locks & RTV_MTU) == 0) {
>> if (mtu < 296 || mtu > ifp->if_mtu)
>> rt->rt_locks |= RTV_MTU;
>> - else if (rt->rt_mtu > mtu || rt->rt_mtu == 0)
>> - rt->rt_mtu = mtu;
>> + else if (rtmtu > mtu || rtmtu == 0)
>> + atomic_cas_uint(&rt->rt_mtu, rtmtu, mtu);
>> }
>>
>> if_put(ifp);
>> @@ -1089,7 +1091,7 @@ icmp_mtudisc_timeout(struct rtentry *rt,
>> rtableid, NULL);
>> } else {
>> if ((rt->rt_locks & RTV_MTU) == 0)
>> - rt->rt_mtu = 0;
>> + atomic_store_int(&rt->rt_mtu, 0);
>> }
>>
>> if_put(ifp);
>> Index: netinet/ip_input.c
>> ===================================================================
>> RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/ip_input.c,v
>> diff -u -p -r1.402 ip_input.c
>> --- netinet/ip_input.c 21 Nov 2024 20:15:44 -0000 1.402
>> +++ netinet/ip_input.c 25 Dec 2024 17:49:03 -0000
>> @@ -1650,8 +1650,11 @@ ip_forward(struct mbuf *m, struct ifnet
>> type = ICMP_UNREACH;
>> code = ICMP_UNREACH_NEEDFRAG;
>> if (rt != NULL) {
>> - if (rt->rt_mtu) {
>> - destmtu = rt->rt_mtu;
>> + u_int rtmtu;
>> +
>> + rtmtu = atomic_load_int(&rt->rt_mtu);
>> + if (rtmtu != 0) {
>> + destmtu = rtmtu;
>> } else {
>> struct ifnet *destifp;
>>
>> Index: netinet/ip_output.c
>> ===================================================================
>> RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/ip_output.c,v
>> diff -u -p -r1.401 ip_output.c
>> --- netinet/ip_output.c 2 Jul 2024 18:33:47 -0000 1.401
>> +++ netinet/ip_output.c 25 Dec 2024 18:55:12 -0000
>> @@ -211,7 +211,8 @@ reroute:
>> error = EHOSTUNREACH;
>> goto bad;
>> }
>> - if ((mtu = ro->ro_rt->rt_mtu) == 0)
>> + mtu = atomic_load_int(&ro->ro_rt->rt_mtu);
>> + if (mtu == 0)
>> mtu = ifp->if_mtu;
>>
>> if (ro->ro_rt->rt_flags & RTF_GATEWAY)
>> @@ -470,9 +471,14 @@ sendit:
>> */
>> if (rtisvalid(ro->ro_rt) &&
>> ISSET(ro->ro_rt->rt_flags, RTF_HOST) &&
>> - !(ro->ro_rt->rt_locks & RTV_MTU) &&
>> - (ro->ro_rt->rt_mtu > ifp->if_mtu)) {
>> - ro->ro_rt->rt_mtu = ifp->if_mtu;
>> + !(ro->ro_rt->rt_locks & RTV_MTU)) {
>> + u_int rtmtu;
>> +
>> + rtmtu = atomic_load_int(&ro->ro_rt->rt_mtu);
>> + if (rtmtu > ifp->if_mtu) {
>> + atomic_cas_uint(&ro->ro_rt->rt_mtu, rtmtu,
>> + ifp->if_mtu);
>> + }
>> }
>> ipstat_inc(ips_cantfrag);
>> goto bad;
>> @@ -558,7 +564,7 @@ ip_output_ipsec_pmtu_update(struct tdb *
>> DPRINTF("spi %08x mtu %d rt %p cloned %d",
>> ntohl(tdb->tdb_spi), tdb->tdb_mtu, rt, rt_mtucloned);
>> if (rt != NULL) {
>> - rt->rt_mtu = tdb->tdb_mtu;
>> + atomic_store_int(&rt->rt_mtu, tdb->tdb_mtu);
>> if (ro != NULL && ro->ro_rt != NULL) {
>> rtfree(ro->ro_rt);
>> ro->ro_rt = rtalloc(&ro->ro_dstsa, RT_RESOLVE,
>> Index: netinet/tcp_input.c
>> ===================================================================
>> RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/tcp_input.c,v
>> diff -u -p -r1.412 tcp_input.c
>> --- netinet/tcp_input.c 24 Dec 2024 19:16:53 -0000 1.412
>> +++ netinet/tcp_input.c 25 Dec 2024 18:56:37 -0000
>> @@ -2835,7 +2835,7 @@ tcp_mss(struct tcpcb *tp, int offer)
>> * if there's an mtu associated with the route and we support
>> * path MTU discovery for the underlying protocol family, use it.
>> */
>> - rtmtu = READ_ONCE(rt->rt_mtu);
>> + rtmtu = atomic_load_int(&rt->rt_mtu);
>> if (rtmtu) {
>> /*
>> * One may wish to lower MSS to take into account options,
>> Index: netinet6/icmp6.c
>> ===================================================================
>> RCS file: /data/mirror/openbsd/cvs/src/sys/netinet6/icmp6.c,v
>> diff -u -p -r1.255 icmp6.c
>> --- netinet6/icmp6.c 12 Aug 2024 11:25:27 -0000 1.255
>> +++ netinet6/icmp6.c 25 Dec 2024 19:00:50 -0000
>> @@ -1016,16 +1016,20 @@ icmp6_mtudisc_update(struct ip6ctlparam
>> rt = icmp6_mtudisc_clone(&sin6, m->m_pkthdr.ph_rtableid, 0);
>>
>> if (rt != NULL && ISSET(rt->rt_flags, RTF_HOST) &&
>> - !(rt->rt_locks & RTV_MTU) &&
>> - (rt->rt_mtu > mtu || rt->rt_mtu == 0)) {
>> - struct ifnet *ifp;
>> -
>> - ifp = if_get(rt->rt_ifidx);
>> - if (ifp != NULL && mtu < ifp->if_mtu) {
>> - icmp6stat_inc(icp6s_pmtuchg);
>> - rt->rt_mtu = mtu;
>> + !(rt->rt_locks & RTV_MTU)) {
>> + u_int rtmtu;
>> +
>> + rtmtu = atomic_load_int(&rt->rt_mtu);
>> + if (rtmtu > mtu || rtmtu == 0) {
>> + struct ifnet *ifp;
>> +
>> + ifp = if_get(rt->rt_ifidx);
>> + if (ifp != NULL && mtu < ifp->if_mtu) {
>> + icmp6stat_inc(icp6s_pmtuchg);
>> + atomic_cas_uint(&rt->rt_mtu, rtmtu, mtu);
>> + }
>> + if_put(ifp);
>> }
>> - if_put(ifp);
>> }
>> rtfree(rt);
>>
>> @@ -1848,7 +1852,7 @@ icmp6_mtudisc_timeout(struct rtentry *rt
>> rtdeletemsg(rt, ifp, rtableid);
>> } else {
>> if (!(rt->rt_locks & RTV_MTU))
>> - rt->rt_mtu = 0;
>> + atomic_store_int(&rt->rt_mtu, 0);
>> }
>>
>> if_put(ifp);
>> Index: netinet6/ip6_forward.c
>> ===================================================================
>> RCS file: /data/mirror/openbsd/cvs/src/sys/netinet6/ip6_forward.c,v
>> diff -u -p -r1.124 ip6_forward.c
>> --- netinet6/ip6_forward.c 19 Jul 2024 16:58:32 -0000 1.124
>> +++ netinet6/ip6_forward.c 25 Dec 2024 19:02:20 -0000
>> @@ -399,8 +399,11 @@ senderr:
>> case EMSGSIZE:
>> type = ICMP6_PACKET_TOO_BIG;
>> if (rt != NULL) {
>> - if (rt->rt_mtu) {
>> - destmtu = rt->rt_mtu;
>> + u_int rtmtu;
>> +
>> + rtmtu = atomic_load_int(&rt->rt_mtu);
>> + if (rtmtu != 0) {
>> + destmtu = rtmtu;
>> } else {
>> struct ifnet *destifp;
>>
>> Index: netinet6/ip6_output.c
>> ===================================================================
>> RCS file: /data/mirror/openbsd/cvs/src/sys/netinet6/ip6_output.c,v
>> diff -u -p -r1.292 ip6_output.c
>> --- netinet6/ip6_output.c 4 Jul 2024 12:50:08 -0000 1.292
>> +++ netinet6/ip6_output.c 25 Dec 2024 19:16:03 -0000
>> @@ -156,8 +156,8 @@ struct idgen32_ctx ip6_id_ctx;
>> * The mbuf chain containing the packet will be freed.
>> * The mbuf opt, if present, will not be freed.
>> *
>> - * type of "mtu": rt_mtu is u_long, ifnet.ifr_mtu is int.
>> - * We use u_long to hold largest one, * which is rt_mtu.
>> + * type of "mtu": rt_mtu is u_int, ifnet.ifr_mtu is int.
>> + * We use u_long to hold largest one. XXX should be u_int
>> */
>> int
>> ip6_output(struct mbuf *m, struct ip6_pktopts *opt, struct route *ro,
>> @@ -1027,11 +1027,11 @@ ip6_insertfraghdr(struct mbuf *m0, struc
>> int
>> ip6_getpmtu(struct rtentry *rt, struct ifnet *ifp, u_long *mtup)
>> {
>> - u_int32_t mtu = 0;
>> + u_int mtu, rtmtu;
>> int error = 0;
>>
>> if (rt != NULL) {
>> - mtu = rt->rt_mtu;
>> + mtu = rtmtu = atomic_load_int(&rt->rt_mtu);
>> if (mtu == 0)
>> mtu = ifp->if_mtu;
>> else if (mtu < IPV6_MMTU) {
>> @@ -1048,7 +1048,7 @@ ip6_getpmtu(struct rtentry *rt, struct i
>> */
>> mtu = ifp->if_mtu;
>> if (!(rt->rt_locks & RTV_MTU))
>> - rt->rt_mtu = mtu;
>> + atomic_cas_uint(&rt->rt_mtu, rtmtu, mtu);
>> }
>> } else {
>> mtu = ifp->if_mtu;
>> @@ -2812,7 +2812,7 @@ ip6_output_ipsec_pmtu_update(struct tdb
>> DPRINTF("spi %08x mtu %d rt %p cloned %d",
>> ntohl(tdb->tdb_spi), tdb->tdb_mtu, rt, rt_mtucloned);
>> if (rt != NULL) {
>> - rt->rt_mtu = tdb->tdb_mtu;
>> + atomic_store_int(&rt->rt_mtu, tdb->tdb_mtu);
>> if (ro != NULL && ro->ro_rt != NULL) {
>> rtfree(ro->ro_rt);
>> ro->ro_rt = rtalloc(&ro->ro_dstsa, RT_RESOLVE,
>> Index: sys/atomic.h
>> ===================================================================
>> RCS file: /data/mirror/openbsd/cvs/src/sys/sys/atomic.h,v
>> diff -u -p -r1.9 atomic.h
>> --- sys/atomic.h 21 Mar 2022 05:45:52 -0000 1.9
>> +++ sys/atomic.h 25 Dec 2024 17:38:09 -0000
>> @@ -202,13 +202,13 @@ atomic_sub_long_nv(volatile unsigned lon
>> */
>>
>> static inline unsigned int
>> -atomic_load_int(volatile unsigned int *p)
>> +atomic_load_int(volatile const unsigned int *p)
>> {
>> return *p;
>> }
>>
>> static inline unsigned long
>> -atomic_load_long(volatile unsigned long *p)
>> +atomic_load_long(volatile const unsigned long *p)
>> {
>> return *p;
>> }
>
route mtu atomic