Download raw body.
route mtu atomic
Hi,
With TCP unlocking we run into the problem that path MTU discovery
happens in parallel. To keep route mtu consistent, I want to make
access to rt_mtu atomic.
The compare-and-swap function is used to detect if another thread
is modifying the mtu. In this case skip updating rt_mtu.
ok?
bluhm
Index: net/if_loop.c
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/net/if_loop.c,v
diff -u -p -r1.98 if_loop.c
--- net/if_loop.c 29 Dec 2023 11:43:04 -0000 1.98
+++ net/if_loop.c 25 Dec 2024 17:22:50 -0000
@@ -277,8 +277,8 @@ looutput(struct ifnet *ifp, struct mbuf
void
lortrequest(struct ifnet *ifp, int cmd, struct rtentry *rt)
{
- if (rt && rt->rt_mtu == 0)
- rt->rt_mtu = LOMTU;
+ if (rt != NULL)
+ atomic_cas_uint(&rt->rt_mtu, 0, LOMTU);
}
/*
Index: net/route.c
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/net/route.c,v
diff -u -p -r1.437 route.c
--- net/route.c 20 Sep 2024 02:00:46 -0000 1.437
+++ net/route.c 25 Dec 2024 17:35:50 -0000
@@ -557,8 +557,14 @@ rt_setgwroute(struct rtentry *rt, const
* If the MTU of next hop is 0, this will reset the MTU of the
* route to run PMTUD again from scratch.
*/
- if (!ISSET(rt->rt_locks, RTV_MTU) && (rt->rt_mtu > nhrt->rt_mtu))
- rt->rt_mtu = nhrt->rt_mtu;
+ if (!ISSET(rt->rt_locks, RTV_MTU)) {
+ u_int mtu, nhmtu;
+
+ mtu = atomic_load_int(&rt->rt_mtu);
+ nhmtu = atomic_load_int(&nhrt->rt_mtu);
+ if (mtu > nhmtu)
+ atomic_cas_uint(&rt->rt_mtu, mtu, nhmtu);
+ }
/*
* To avoid reference counting problems when writing link-layer
Index: net/route.h
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/net/route.h,v
diff -u -p -r1.210 route.h
--- net/route.h 31 Mar 2024 15:53:12 -0000 1.210
+++ net/route.h 25 Dec 2024 17:24:16 -0000
@@ -61,7 +61,7 @@ struct rt_kmetrics {
u_int64_t rmx_pksent; /* packets sent using this route */
int64_t rmx_expire; /* lifetime for route, e.g. redirect */
u_int rmx_locks; /* Kernel must leave these values */
- u_int rmx_mtu; /* MTU for this path */
+ u_int rmx_mtu; /* [a] MTU for this path */
};
#endif
Index: net/rtsock.c
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/net/rtsock.c,v
diff -u -p -r1.375 rtsock.c
--- net/rtsock.c 12 Jul 2024 17:20:18 -0000 1.375
+++ net/rtsock.c 25 Dec 2024 17:39:32 -0000
@@ -1345,7 +1345,7 @@ route_cleargateway(struct rtentry *rt, v
if (ISSET(rt->rt_flags, RTF_GATEWAY) && rt->rt_gwroute == nhrt &&
!ISSET(rt->rt_locks, RTV_MTU))
- rt->rt_mtu = 0;
+ atomic_store_int(&rt->rt_mtu, 0);
return (0);
}
@@ -1393,7 +1393,7 @@ rtm_setmetrics(u_long which, const struc
int64_t expire;
if (which & RTV_MTU)
- out->rmx_mtu = in->rmx_mtu;
+ atomic_store_int(&out->rmx_mtu, in->rmx_mtu);
if (which & RTV_EXPIRE) {
expire = in->rmx_expire;
if (expire != 0) {
@@ -1421,7 +1421,7 @@ rtm_getmetrics(const struct rtentry *rt,
bzero(out, sizeof(*out));
out->rmx_locks = in->rmx_locks;
- out->rmx_mtu = in->rmx_mtu;
+ out->rmx_mtu = atomic_load_int(&in->rmx_mtu);
out->rmx_expire = expire;
out->rmx_pksent = in->rmx_pksent;
}
Index: netinet/ip_icmp.c
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/ip_icmp.c,v
diff -u -p -r1.197 ip_icmp.c
--- netinet/ip_icmp.c 4 Dec 2024 22:24:11 -0000 1.197
+++ netinet/ip_icmp.c 25 Dec 2024 17:46:46 -0000
@@ -1008,6 +1008,7 @@ icmp_mtudisc(struct icmp *icp, u_int rta
{
struct rtentry *rt;
struct ifnet *ifp;
+ u_int rtmtu;
u_long mtu = ntohs(icp->icmp_nextmtu); /* Why a long? IPv6 */
rt = icmp_mtudisc_clone(icp->icmp_ip.ip_dst, rtableid, 0);
@@ -1020,17 +1021,18 @@ icmp_mtudisc(struct icmp *icp, u_int rta
return;
}
+ rtmtu = atomic_load_int(&rt->rt_mtu);
if (mtu == 0) {
int i = 0;
mtu = ntohs(icp->icmp_ip.ip_len);
/* Some 4.2BSD-based routers incorrectly adjust the ip_len */
- if (mtu > rt->rt_mtu && rt->rt_mtu != 0)
+ if (mtu > rtmtu && rtmtu != 0)
mtu -= (icp->icmp_ip.ip_hl << 2);
/* If we still can't guess a value, try the route */
if (mtu == 0) {
- mtu = rt->rt_mtu;
+ mtu = rtmtu;
/* If no route mtu, default to the interface mtu */
@@ -1055,8 +1057,8 @@ icmp_mtudisc(struct icmp *icp, u_int rta
if ((rt->rt_locks & RTV_MTU) == 0) {
if (mtu < 296 || mtu > ifp->if_mtu)
rt->rt_locks |= RTV_MTU;
- else if (rt->rt_mtu > mtu || rt->rt_mtu == 0)
- rt->rt_mtu = mtu;
+ else if (rtmtu > mtu || rtmtu == 0)
+ atomic_cas_uint(&rt->rt_mtu, rtmtu, mtu);
}
if_put(ifp);
@@ -1089,7 +1091,7 @@ icmp_mtudisc_timeout(struct rtentry *rt,
rtableid, NULL);
} else {
if ((rt->rt_locks & RTV_MTU) == 0)
- rt->rt_mtu = 0;
+ atomic_store_int(&rt->rt_mtu, 0);
}
if_put(ifp);
Index: netinet/ip_input.c
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/ip_input.c,v
diff -u -p -r1.402 ip_input.c
--- netinet/ip_input.c 21 Nov 2024 20:15:44 -0000 1.402
+++ netinet/ip_input.c 25 Dec 2024 17:49:03 -0000
@@ -1650,8 +1650,11 @@ ip_forward(struct mbuf *m, struct ifnet
type = ICMP_UNREACH;
code = ICMP_UNREACH_NEEDFRAG;
if (rt != NULL) {
- if (rt->rt_mtu) {
- destmtu = rt->rt_mtu;
+ u_int rtmtu;
+
+ rtmtu = atomic_load_int(&rt->rt_mtu);
+ if (rtmtu != 0) {
+ destmtu = rtmtu;
} else {
struct ifnet *destifp;
Index: netinet/ip_output.c
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/ip_output.c,v
diff -u -p -r1.401 ip_output.c
--- netinet/ip_output.c 2 Jul 2024 18:33:47 -0000 1.401
+++ netinet/ip_output.c 25 Dec 2024 18:55:12 -0000
@@ -211,7 +211,8 @@ reroute:
error = EHOSTUNREACH;
goto bad;
}
- if ((mtu = ro->ro_rt->rt_mtu) == 0)
+ mtu = atomic_load_int(&ro->ro_rt->rt_mtu);
+ if (mtu == 0)
mtu = ifp->if_mtu;
if (ro->ro_rt->rt_flags & RTF_GATEWAY)
@@ -470,9 +471,14 @@ sendit:
*/
if (rtisvalid(ro->ro_rt) &&
ISSET(ro->ro_rt->rt_flags, RTF_HOST) &&
- !(ro->ro_rt->rt_locks & RTV_MTU) &&
- (ro->ro_rt->rt_mtu > ifp->if_mtu)) {
- ro->ro_rt->rt_mtu = ifp->if_mtu;
+ !(ro->ro_rt->rt_locks & RTV_MTU)) {
+ u_int rtmtu;
+
+ rtmtu = atomic_load_int(&ro->ro_rt->rt_mtu);
+ if (rtmtu > ifp->if_mtu) {
+ atomic_cas_uint(&ro->ro_rt->rt_mtu, rtmtu,
+ ifp->if_mtu);
+ }
}
ipstat_inc(ips_cantfrag);
goto bad;
@@ -558,7 +564,7 @@ ip_output_ipsec_pmtu_update(struct tdb *
DPRINTF("spi %08x mtu %d rt %p cloned %d",
ntohl(tdb->tdb_spi), tdb->tdb_mtu, rt, rt_mtucloned);
if (rt != NULL) {
- rt->rt_mtu = tdb->tdb_mtu;
+ atomic_store_int(&rt->rt_mtu, tdb->tdb_mtu);
if (ro != NULL && ro->ro_rt != NULL) {
rtfree(ro->ro_rt);
ro->ro_rt = rtalloc(&ro->ro_dstsa, RT_RESOLVE,
Index: netinet/tcp_input.c
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/tcp_input.c,v
diff -u -p -r1.412 tcp_input.c
--- netinet/tcp_input.c 24 Dec 2024 19:16:53 -0000 1.412
+++ netinet/tcp_input.c 25 Dec 2024 18:56:37 -0000
@@ -2835,7 +2835,7 @@ tcp_mss(struct tcpcb *tp, int offer)
* if there's an mtu associated with the route and we support
* path MTU discovery for the underlying protocol family, use it.
*/
- rtmtu = READ_ONCE(rt->rt_mtu);
+ rtmtu = atomic_load_int(&rt->rt_mtu);
if (rtmtu) {
/*
* One may wish to lower MSS to take into account options,
Index: netinet6/icmp6.c
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/netinet6/icmp6.c,v
diff -u -p -r1.255 icmp6.c
--- netinet6/icmp6.c 12 Aug 2024 11:25:27 -0000 1.255
+++ netinet6/icmp6.c 25 Dec 2024 19:00:50 -0000
@@ -1016,16 +1016,20 @@ icmp6_mtudisc_update(struct ip6ctlparam
rt = icmp6_mtudisc_clone(&sin6, m->m_pkthdr.ph_rtableid, 0);
if (rt != NULL && ISSET(rt->rt_flags, RTF_HOST) &&
- !(rt->rt_locks & RTV_MTU) &&
- (rt->rt_mtu > mtu || rt->rt_mtu == 0)) {
- struct ifnet *ifp;
-
- ifp = if_get(rt->rt_ifidx);
- if (ifp != NULL && mtu < ifp->if_mtu) {
- icmp6stat_inc(icp6s_pmtuchg);
- rt->rt_mtu = mtu;
+ !(rt->rt_locks & RTV_MTU)) {
+ u_int rtmtu;
+
+ rtmtu = atomic_load_int(&rt->rt_mtu);
+ if (rtmtu > mtu || rtmtu == 0) {
+ struct ifnet *ifp;
+
+ ifp = if_get(rt->rt_ifidx);
+ if (ifp != NULL && mtu < ifp->if_mtu) {
+ icmp6stat_inc(icp6s_pmtuchg);
+ atomic_cas_uint(&rt->rt_mtu, rtmtu, mtu);
+ }
+ if_put(ifp);
}
- if_put(ifp);
}
rtfree(rt);
@@ -1848,7 +1852,7 @@ icmp6_mtudisc_timeout(struct rtentry *rt
rtdeletemsg(rt, ifp, rtableid);
} else {
if (!(rt->rt_locks & RTV_MTU))
- rt->rt_mtu = 0;
+ atomic_store_int(&rt->rt_mtu, 0);
}
if_put(ifp);
Index: netinet6/ip6_forward.c
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/netinet6/ip6_forward.c,v
diff -u -p -r1.124 ip6_forward.c
--- netinet6/ip6_forward.c 19 Jul 2024 16:58:32 -0000 1.124
+++ netinet6/ip6_forward.c 25 Dec 2024 19:02:20 -0000
@@ -399,8 +399,11 @@ senderr:
case EMSGSIZE:
type = ICMP6_PACKET_TOO_BIG;
if (rt != NULL) {
- if (rt->rt_mtu) {
- destmtu = rt->rt_mtu;
+ u_int rtmtu;
+
+ rtmtu = atomic_load_int(&rt->rt_mtu);
+ if (rtmtu != 0) {
+ destmtu = rtmtu;
} else {
struct ifnet *destifp;
Index: netinet6/ip6_output.c
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/netinet6/ip6_output.c,v
diff -u -p -r1.292 ip6_output.c
--- netinet6/ip6_output.c 4 Jul 2024 12:50:08 -0000 1.292
+++ netinet6/ip6_output.c 25 Dec 2024 19:16:03 -0000
@@ -156,8 +156,8 @@ struct idgen32_ctx ip6_id_ctx;
* The mbuf chain containing the packet will be freed.
* The mbuf opt, if present, will not be freed.
*
- * type of "mtu": rt_mtu is u_long, ifnet.ifr_mtu is int.
- * We use u_long to hold largest one, * which is rt_mtu.
+ * type of "mtu": rt_mtu is u_int, ifnet.ifr_mtu is int.
+ * We use u_long to hold largest one. XXX should be u_int
*/
int
ip6_output(struct mbuf *m, struct ip6_pktopts *opt, struct route *ro,
@@ -1027,11 +1027,11 @@ ip6_insertfraghdr(struct mbuf *m0, struc
int
ip6_getpmtu(struct rtentry *rt, struct ifnet *ifp, u_long *mtup)
{
- u_int32_t mtu = 0;
+ u_int mtu, rtmtu;
int error = 0;
if (rt != NULL) {
- mtu = rt->rt_mtu;
+ mtu = rtmtu = atomic_load_int(&rt->rt_mtu);
if (mtu == 0)
mtu = ifp->if_mtu;
else if (mtu < IPV6_MMTU) {
@@ -1048,7 +1048,7 @@ ip6_getpmtu(struct rtentry *rt, struct i
*/
mtu = ifp->if_mtu;
if (!(rt->rt_locks & RTV_MTU))
- rt->rt_mtu = mtu;
+ atomic_cas_uint(&rt->rt_mtu, rtmtu, mtu);
}
} else {
mtu = ifp->if_mtu;
@@ -2812,7 +2812,7 @@ ip6_output_ipsec_pmtu_update(struct tdb
DPRINTF("spi %08x mtu %d rt %p cloned %d",
ntohl(tdb->tdb_spi), tdb->tdb_mtu, rt, rt_mtucloned);
if (rt != NULL) {
- rt->rt_mtu = tdb->tdb_mtu;
+ atomic_store_int(&rt->rt_mtu, tdb->tdb_mtu);
if (ro != NULL && ro->ro_rt != NULL) {
rtfree(ro->ro_rt);
ro->ro_rt = rtalloc(&ro->ro_dstsa, RT_RESOLVE,
Index: sys/atomic.h
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/sys/atomic.h,v
diff -u -p -r1.9 atomic.h
--- sys/atomic.h 21 Mar 2022 05:45:52 -0000 1.9
+++ sys/atomic.h 25 Dec 2024 17:38:09 -0000
@@ -202,13 +202,13 @@ atomic_sub_long_nv(volatile unsigned lon
*/
static inline unsigned int
-atomic_load_int(volatile unsigned int *p)
+atomic_load_int(volatile const unsigned int *p)
{
return *p;
}
static inline unsigned long
-atomic_load_long(volatile unsigned long *p)
+atomic_load_long(volatile const unsigned long *p)
{
return *p;
}
route mtu atomic