Index | Thread | Search

From:
Alexander Bluhm <alexander.bluhm@gmx.net>
Subject:
Re: route cache mpath
To:
tech@openbsd.org
Date:
Tue, 27 Feb 2024 13:52:51 +0100

Download raw body.

Thread
  • Alexander Bluhm:

    route cache mpath

  • On Mon, Feb 26, 2024 at 11:44:57AM +0100, Alexander Bluhm wrote:
    > Also IP input should pass a struct route to IP forward.  This is
    > the same logic that is done when passing a route from IP forward
    > to IP output.  As a result the numbers of route cache lookups in
    > netstat -s should be correct now.
    > 
    > Finally I removed some inconsistencies between IPv4 and IPv4 and
    > IP forward and IP output.
    
    > Or should I split the diff in smaller pieces?
    
    This is the other half of the diff.
    
    ok?
    
    bluhm
    
    Index: netinet/ip_input.c
    ===================================================================
    RCS file: /cvs/src/sys/netinet/ip_input.c,v
    diff -u -p -r1.390 ip_input.c
    --- netinet/ip_input.c	22 Feb 2024 14:25:58 -0000	1.390
    +++ netinet/ip_input.c	27 Feb 2024 12:46:54 -0000
    @@ -138,7 +138,7 @@ extern struct niqueue		arpinq;
     
     int	ip_ours(struct mbuf **, int *, int, int);
     int	ip_dooptions(struct mbuf *, struct ifnet *);
    -int	in_ouraddr(struct mbuf *, struct ifnet *, struct rtentry **);
    +int	in_ouraddr(struct mbuf *, struct ifnet *, struct route *);
     
     int		ip_fragcheck(struct mbuf **, int *);
     struct mbuf *	ip_reass(struct ipqent *, struct ipq *);
    @@ -387,14 +387,18 @@ bad:
     int
     ip_input_if(struct mbuf **mp, int *offp, int nxt, int af, struct ifnet *ifp)
     {
    -	struct mbuf	*m;
    -	struct rtentry	*rt = NULL;
    -	struct ip	*ip;
    +	struct route ro;
    +	struct mbuf *m;
    +	struct ip *ip;
     	int hlen;
    -	in_addr_t pfrdr = 0;
    +#if NPF > 0
    +	struct in_addr odst;
    +#endif
    +	int pfrdr = 0;
     
     	KASSERT(*offp == 0);
     
    +	ro.ro_rt = NULL;
     	ipstat_inc(ips_total);
     	m = *mp = ipv4_check(ifp, *mp);
     	if (m == NULL)
    @@ -412,7 +416,7 @@ ip_input_if(struct mbuf **mp, int *offp,
     	/*
     	 * Packet filter
     	 */
    -	pfrdr = ip->ip_dst.s_addr;
    +	odst = ip->ip_dst;
     	if (pf_test(AF_INET, PF_IN, ifp, mp) != PF_PASS)
     		goto bad;
     	m = *mp;
    @@ -420,7 +424,7 @@ ip_input_if(struct mbuf **mp, int *offp,
     		goto bad;
     
     	ip = mtod(m, struct ip *);
    -	pfrdr = (pfrdr != ip->ip_dst.s_addr);
    +	pfrdr = odst.s_addr != ip->ip_dst.s_addr;
     #endif
     
     	hlen = ip->ip_hl << 2;
    @@ -442,7 +446,7 @@ ip_input_if(struct mbuf **mp, int *offp,
     		goto out;
     	}
     
    -	switch(in_ouraddr(m, ifp, &rt)) {
    +	switch(in_ouraddr(m, ifp, &ro)) {
     	case 2:
     		goto bad;
     	case 1:
    @@ -544,14 +548,14 @@ ip_input_if(struct mbuf **mp, int *offp,
     	}
     #endif /* IPSEC */
     
    -	ip_forward(m, ifp, rt, pfrdr);
    +	ip_forward(m, ifp, &ro, pfrdr);
     	*mp = NULL;
     	return IPPROTO_DONE;
      bad:
     	nxt = IPPROTO_DONE;
     	m_freemp(mp);
      out:
    -	rtfree(rt);
    +	rtfree(ro.ro_rt);
     	return nxt;
     }
     
    @@ -748,11 +752,10 @@ ip_deliver(struct mbuf **mp, int *offp, 
     #undef IPSTAT_INC
     
     int
    -in_ouraddr(struct mbuf *m, struct ifnet *ifp, struct rtentry **prt)
    +in_ouraddr(struct mbuf *m, struct ifnet *ifp, struct route *ro)
     {
     	struct rtentry		*rt;
     	struct ip		*ip;
    -	struct sockaddr_in	 sin;
     	int			 match = 0;
     
     #if NPF > 0
    @@ -769,13 +772,8 @@ in_ouraddr(struct mbuf *m, struct ifnet 
     
     	ip = mtod(m, struct ip *);
     
    -	memset(&sin, 0, sizeof(sin));
    -	sin.sin_len = sizeof(sin);
    -	sin.sin_family = AF_INET;
    -	sin.sin_addr = ip->ip_dst;
    -	rt = rtalloc_mpath(sintosa(&sin), &ip->ip_src.s_addr,
    -	    m->m_pkthdr.ph_rtableid);
    -	if (rtisvalid(rt)) {
    +	rt = route_mpath(ro, &ip->ip_dst, &ip->ip_src, m->m_pkthdr.ph_rtableid);
    +	if (rt != NULL) {
     		if (ISSET(rt->rt_flags, RTF_LOCAL))
     			match = 1;
     
    @@ -791,7 +789,6 @@ in_ouraddr(struct mbuf *m, struct ifnet 
     			m->m_flags |= M_BCAST;
     		}
     	}
    -	*prt = rt;
     
     	if (!match) {
     		struct ifaddr *ifa;
    @@ -1470,11 +1467,12 @@ const u_char inetctlerrmap[PRC_NCMDS] = 
      * via a source route.
      */
     void
    -ip_forward(struct mbuf *m, struct ifnet *ifp, struct rtentry *rt, int srcrt)
    +ip_forward(struct mbuf *m, struct ifnet *ifp, struct route *ro, int srcrt)
     {
    -	struct mbuf mfake, *mcopy = NULL;
    +	struct mbuf mfake, *mcopy;
     	struct ip *ip = mtod(m, struct ip *);
    -	struct route ro;
    +	struct route iproute;
    +	struct rtentry *rt;
     	int error = 0, type = 0, code = 0, destmtu = 0, fake = 0, len;
     	u_int32_t dest;
     
    @@ -1482,26 +1480,23 @@ ip_forward(struct mbuf *m, struct ifnet 
     	if (m->m_flags & (M_BCAST|M_MCAST) || in_canforward(ip->ip_dst) == 0) {
     		ipstat_inc(ips_cantforward);
     		m_freem(m);
    -		goto freecopy;
    +		goto done;
     	}
     	if (ip->ip_ttl <= IPTTLDEC) {
     		icmp_error(m, ICMP_TIMXCEED, ICMP_TIMXCEED_INTRANS, dest, 0);
    -		goto freecopy;
    +		goto done;
     	}
     
    -	ro.ro_rt = NULL;
    -	route_cache(&ro, &ip->ip_dst, &ip->ip_src, m->m_pkthdr.ph_rtableid);
    -	if (!rtisvalid(rt)) {
    -		rtfree(rt);
    -		rt = rtalloc_mpath(&ro.ro_dstsa, &ip->ip_src.s_addr,
    -		    m->m_pkthdr.ph_rtableid);
    -		if (rt == NULL) {
    -			ipstat_inc(ips_noroute);
    -			icmp_error(m, ICMP_UNREACH, ICMP_UNREACH_HOST, dest, 0);
    -			return;
    -		}
    +	if (ro == NULL) {
    +		ro = &iproute;
    +		ro->ro_rt = NULL;
    +	}
    +	rt = route_mpath(ro, &ip->ip_dst, &ip->ip_src, m->m_pkthdr.ph_rtableid);
    +	if (rt == NULL) {
    +		ipstat_inc(ips_noroute);
    +		icmp_error(m, ICMP_UNREACH, ICMP_UNREACH_HOST, dest, 0);
    +		goto done;
     	}
    -	ro.ro_rt = rt;
     
     	/*
     	 * Save at most 68 bytes of the packet in case
    @@ -1552,10 +1547,10 @@ ip_forward(struct mbuf *m, struct ifnet 
     		}
     	}
     
    -	error = ip_output(m, NULL, &ro,
    +	error = ip_output(m, NULL, ro,
     	    (IP_FORWARDING | (ip_directedbcast ? IP_ALLOWBROADCAST : 0)),
     	    NULL, NULL, 0);
    -	rt = ro.ro_rt;
    +	rt = ro->ro_rt;
     	if (error)
     		ipstat_inc(ips_cantforward);
     	else {
    @@ -1563,10 +1558,10 @@ ip_forward(struct mbuf *m, struct ifnet 
     		if (type)
     			ipstat_inc(ips_redirectsent);
     		else
    -			goto freecopy;
    +			goto done;
     	}
     	if (!fake)
    -		goto freecopy;
    +		goto done;
     
     	switch (error) {
     	case 0:				/* forwarded, but need redirect */
    @@ -1590,7 +1585,7 @@ ip_forward(struct mbuf *m, struct ifnet 
     		}
     		ipstat_inc(ips_cantfrag);
     		if (destmtu == 0)
    -			goto freecopy;
    +			goto done;
     		break;
     
     	case EACCES:
    @@ -1598,7 +1593,7 @@ ip_forward(struct mbuf *m, struct ifnet 
     		 * pf(4) blocked the packet. There is no need to send an ICMP
     		 * packet back since pf(4) takes care of it.
     		 */
    -		goto freecopy;
    +		goto done;
     
     	case ENOBUFS:
     		/*
    @@ -1607,7 +1602,7 @@ ip_forward(struct mbuf *m, struct ifnet 
     		 * source quench could be a big problem under DoS attacks,
     		 * or the underlying interface is rate-limited.
     		 */
    -		goto freecopy;
    +		goto done;
     
     	case ENETUNREACH:		/* shouldn't happen, checked above */
     	case EHOSTUNREACH:
    @@ -1622,10 +1617,11 @@ ip_forward(struct mbuf *m, struct ifnet 
     	if (mcopy)
     		icmp_error(mcopy, type, code, dest, destmtu);
     
    -freecopy:
    +done:
    +	if (ro == &iproute && ro->ro_rt)
    +		rtfree(ro->ro_rt);
     	if (fake)
     		m_tag_delete_chain(&mfake);
    -	rtfree(rt);
     }
     
     int
    Index: netinet/ip_var.h
    ===================================================================
    RCS file: /cvs/src/sys/netinet/ip_var.h,v
    diff -u -p -r1.113 ip_var.h
    --- netinet/ip_var.h	13 Feb 2024 12:22:09 -0000	1.113
    +++ netinet/ip_var.h	27 Feb 2024 12:46:54 -0000
    @@ -255,7 +255,7 @@ void	 ip_savecontrol(struct inpcb *, str
     	    struct mbuf *);
     int	 ip_input_if(struct mbuf **, int *, int, int, struct ifnet *);
     int	 ip_deliver(struct mbuf **, int *, int, int);
    -void	 ip_forward(struct mbuf *, struct ifnet *, struct rtentry *, int);
    +void	 ip_forward(struct mbuf *, struct ifnet *, struct route *, int);
     int	 rip_ctloutput(int, struct socket *, int, int, struct mbuf *);
     void	 rip_init(void);
     int	 rip_input(struct mbuf **, int *, int, int);
    Index: netinet6/ip6_forward.c
    ===================================================================
    RCS file: /cvs/src/sys/netinet6/ip6_forward.c,v
    diff -u -p -r1.115 ip6_forward.c
    --- netinet6/ip6_forward.c	22 Feb 2024 14:25:58 -0000	1.115
    +++ netinet6/ip6_forward.c	27 Feb 2024 12:46:55 -0000
    @@ -82,14 +82,15 @@
      */
     
     void
    -ip6_forward(struct mbuf *m, struct rtentry *rt, int srcrt)
    +ip6_forward(struct mbuf *m, struct route *ro, int srcrt)
     {
     	struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *);
    +	struct route iproute;
    +	struct rtentry *rt;
     	struct sockaddr *dst;
    -	struct route ro;
     	struct ifnet *ifp = NULL;
     	int error = 0, type = 0, code = 0, destmtu = 0;
    -	struct mbuf *mcopy = NULL;
    +	struct mbuf *mcopy;
     #ifdef IPSEC
     	struct tdb *tdb = NULL;
     #endif /* IPSEC */
    @@ -121,13 +122,13 @@ ip6_forward(struct mbuf *m, struct rtent
     			    m->m_pkthdr.ph_ifidx);
     		}
     		m_freem(m);
    -		goto out;
    +		goto done;
     	}
     
     	if (ip6->ip6_hlim <= IPV6_HLIMDEC) {
     		icmp6_error(m, ICMP6_TIME_EXCEEDED,
     				ICMP6_TIME_EXCEED_TRANSIT, 0);
    -		goto out;
    +		goto done;
     	}
     	ip6->ip6_hlim -= IPV6_HLIMDEC;
     
    @@ -165,25 +166,22 @@ reroute:
     	}
     #endif /* IPSEC */
     
    -	ro.ro_rt = NULL;
    -	route6_cache(&ro, &ip6->ip6_dst, &ip6->ip6_src,
    +	if (ro == NULL) {
    +		ro = &iproute;
    +		ro->ro_rt = NULL;
    +	}
    +	rt = route6_mpath(ro, &ip6->ip6_dst, &ip6->ip6_src,
     	    m->m_pkthdr.ph_rtableid);
    -	dst = &ro.ro_dstsa;
    -	if (!rtisvalid(rt)) {
    -		rtfree(rt);
    -		rt = rtalloc_mpath(dst, &ip6->ip6_src.s6_addr32[0],
    -		    m->m_pkthdr.ph_rtableid);
    -		if (rt == NULL) {
    -			ip6stat_inc(ip6s_noroute);
    -			if (mcopy) {
    -				icmp6_error(mcopy, ICMP6_DST_UNREACH,
    -					    ICMP6_DST_UNREACH_NOROUTE, 0);
    -			}
    -			m_freem(m);
    -			goto out;
    +	if (rt == NULL) {
    +		ip6stat_inc(ip6s_noroute);
    +		if (mcopy) {
    +			icmp6_error(mcopy, ICMP6_DST_UNREACH,
    +				    ICMP6_DST_UNREACH_NOROUTE, 0);
     		}
    +		m_freem(m);
    +		goto done;
     	}
    -	ro.ro_rt = rt;
    +	dst = &ro->ro_dstsa;
     
     	/*
     	 * Scope check: if a packet can't be delivered to its destination
    @@ -215,7 +213,7 @@ reroute:
     			icmp6_error(mcopy, ICMP6_DST_UNREACH,
     				    ICMP6_DST_UNREACH_BEYONDSCOPE, 0);
     		m_freem(m);
    -		goto out;
    +		goto done;
     	}
     
     #ifdef IPSEC
    @@ -225,8 +223,8 @@ reroute:
     	 */
     	if (tdb != NULL) {
     		/* Callee frees mbuf */
    -		error = ip6_output_ipsec_send(tdb, m, &ro, 0, 1);
    -		rt = ro.ro_rt;
    +		error = ip6_output_ipsec_send(tdb, m, ro, 0, 1);
    +		rt = ro->ro_rt;
     		if (error)
     			goto senderr;
     		goto freecopy;
    @@ -254,7 +252,7 @@ reroute:
     	    ip6_sendredirects &&
     	    (rt->rt_flags & (RTF_DYNAMIC|RTF_MODIFIED)) == 0) {
     		if ((ifp->if_flags & IFF_POINTOPOINT) &&
    -		    nd6_is_addr_neighbor(&ro.ro_dstsin6, ifp)) {
    +		    nd6_is_addr_neighbor(&ro->ro_dstsin6, ifp)) {
     			/*
     			 * If the incoming interface is equal to the outgoing
     			 * one, the link attached to the interface is
    @@ -274,7 +272,7 @@ reroute:
     				icmp6_error(mcopy, ICMP6_DST_UNREACH,
     				    ICMP6_DST_UNREACH_ADDR, 0);
     			m_freem(m);
    -			goto out;
    +			goto done;
     		}
     		type = ND_REDIRECT;
     	}
    @@ -308,8 +306,7 @@ reroute:
     		/* tag as generated to skip over pf_test on rerun */
     		m->m_pkthdr.pf.flags |= PF_TAG_GENERATED;
     		srcrt = 1;
    -		rtfree(rt);
    -		rt = NULL;
    +		ro = NULL;
     		if_put(ifp);
     		ifp = NULL;
     		goto reroute;
    @@ -324,21 +321,21 @@ reroute:
     	if (error || m == NULL)
     		goto senderr;
     
    -	if (mcopy != NULL)
    +	if (mcopy)
     		icmp6_error(mcopy, ICMP6_PACKET_TOO_BIG, 0, ifp->if_mtu);
     	m_freem(m);
    -	goto out;
    +	goto done;
     
     senderr:
     	if (mcopy == NULL)
    -		goto out;
    +		goto done;
     
     	switch (error) {
     	case 0:
     		if (type == ND_REDIRECT) {
     			icmp6_redirect_output(mcopy, rt);
     			ip6stat_inc(ip6s_redirectsent);
    -			goto out;
    +			goto done;
     		}
     		goto freecopy;
     
    @@ -383,12 +380,13 @@ senderr:
     		break;
     	}
     	icmp6_error(mcopy, type, code, destmtu);
    -	goto out;
    +	goto done;
     
     freecopy:
     	m_freem(mcopy);
    -out:
    -	rtfree(rt);
    +done:
    +	if (ro == &iproute && ro->ro_rt)
    +		rtfree(ro->ro_rt);
     	if_put(ifp);
     #ifdef IPSEC
     	tdb_unref(tdb);
    Index: netinet6/ip6_input.c
    ===================================================================
    RCS file: /cvs/src/sys/netinet6/ip6_input.c,v
    diff -u -p -r1.258 ip6_input.c
    --- netinet6/ip6_input.c	22 Feb 2024 14:25:58 -0000	1.258
    +++ netinet6/ip6_input.c	27 Feb 2024 12:46:55 -0000
    @@ -357,21 +357,21 @@ bad:
     int
     ip6_input_if(struct mbuf **mp, int *offp, int nxt, int af, struct ifnet *ifp)
     {
    +	struct route ro;
     	struct mbuf *m;
     	struct ip6_hdr *ip6;
    -	struct sockaddr_in6 sin6;
    -	struct rtentry *rt = NULL;
    +	struct rtentry *rt;
     	int ours = 0;
     	u_int16_t src_scope, dst_scope;
     #if NPF > 0
     	struct in6_addr odst;
     #endif
    -	int srcrt = 0;
    +	int pfrdr = 0;
     
     	KASSERT(*offp == 0);
     
    +	ro.ro_rt = NULL;
     	ip6stat_inc(ip6s_total);
    -
     	m = *mp = ipv6_check(ifp, *mp);
     	if (m == NULL)
     		goto bad;
    @@ -413,7 +413,7 @@ ip6_input_if(struct mbuf **mp, int *offp
     		goto bad;
     
     	ip6 = mtod(m, struct ip6_hdr *);
    -	srcrt = !IN6_ARE_ADDR_EQUAL(&odst, &ip6->ip6_dst);
    +	pfrdr = !IN6_ARE_ADDR_EQUAL(&odst, &ip6->ip6_dst);
     #endif
     
     	/*
    @@ -517,18 +517,14 @@ ip6_input_if(struct mbuf **mp, int *offp
     	/*
     	 *  Unicast check
     	 */
    -	memset(&sin6, 0, sizeof(struct sockaddr_in6));
    -	sin6.sin6_len = sizeof(struct sockaddr_in6);
    -	sin6.sin6_family = AF_INET6;
    -	sin6.sin6_addr = ip6->ip6_dst;
    -	rt = rtalloc_mpath(sin6tosa(&sin6), &ip6->ip6_src.s6_addr32[0],
    +	rt = route6_mpath(&ro, &ip6->ip6_dst, &ip6->ip6_src,
     	    m->m_pkthdr.ph_rtableid);
     
     	/*
     	 * Accept the packet if the route to the destination is marked
     	 * as local.
     	 */
    -	if (rtisvalid(rt) && ISSET(rt->rt_flags, RTF_LOCAL)) {
    +	if (rt != NULL && ISSET(rt->rt_flags, RTF_LOCAL)) {
     		struct in6_ifaddr *ia6 = ifatoia6(rt->rt_ifa);
     
     		if (ip6_forwarding == 0 && rt->rt_ifidx != ifp->if_index &&
    @@ -618,14 +614,14 @@ ip6_input_if(struct mbuf **mp, int *offp
     	}
     #endif /* IPSEC */
     
    -	ip6_forward(m, rt, srcrt);
    +	ip6_forward(m, &ro, pfrdr);
     	*mp = NULL;
     	return IPPROTO_DONE;
      bad:
     	nxt = IPPROTO_DONE;
     	m_freemp(mp);
      out:
    -	rtfree(rt);
    +	rtfree(ro.ro_rt);
     	return nxt;
     }
     
    Index: netinet6/ip6_output.c
    ===================================================================
    RCS file: /cvs/src/sys/netinet6/ip6_output.c,v
    diff -u -p -r1.287 ip6_output.c
    --- netinet6/ip6_output.c	22 Feb 2024 14:25:58 -0000	1.287
    +++ netinet6/ip6_output.c	27 Feb 2024 12:46:55 -0000
    @@ -391,7 +391,7 @@ reroute:
     	/* initialize cached route */
     	if (ro == NULL) {
     		ro = &iproute;
    -		bzero((caddr_t)ro, sizeof(*ro));
    +		ro->ro_rt = NULL;
     	}
     	ro_pmtu = ro;
     	if (opt && opt->ip6po_rthdr)
    @@ -748,7 +748,15 @@ reroute:
     	    (error = if_output_ml(ifp, &ml, sin6tosa(dst), ro->ro_rt)))
     		goto done;
     	ip6stat_inc(ip6s_fragmented);
    +	goto done;
     
    +freehdrs:
    +	m_freem(exthdrs.ip6e_hbh);	/* m_freem will check if mbuf is 0 */
    +	m_freem(exthdrs.ip6e_dest1);
    +	m_freem(exthdrs.ip6e_rthdr);
    +	m_freem(exthdrs.ip6e_dest2);
    +bad:
    +	m_freem(m);
     done:
     	if (ro == &iproute && ro->ro_rt) {
     		rtfree(ro->ro_rt);
    @@ -760,16 +768,6 @@ done:
     	tdb_unref(tdb);
     #endif /* IPSEC */
     	return (error);
    -
    -freehdrs:
    -	m_freem(exthdrs.ip6e_hbh);	/* m_freem will check if mbuf is 0 */
    -	m_freem(exthdrs.ip6e_dest1);
    -	m_freem(exthdrs.ip6e_rthdr);
    -	m_freem(exthdrs.ip6e_dest2);
    -	/* FALLTHROUGH */
    -bad:
    -	m_freem(m);
    -	goto done;
     }
     
     int
    Index: netinet6/ip6_var.h
    ===================================================================
    RCS file: /cvs/src/sys/netinet6/ip6_var.h,v
    diff -u -p -r1.114 ip6_var.h
    --- netinet6/ip6_var.h	14 Feb 2024 13:18:21 -0000	1.114
    +++ netinet6/ip6_var.h	27 Feb 2024 12:46:55 -0000
    @@ -320,7 +320,7 @@ int	ip6_process_hopopts(struct mbuf **, 
     void	ip6_savecontrol(struct inpcb *, struct mbuf *, struct mbuf **);
     int	ip6_sysctl(int *, u_int, void *, size_t *, void *, size_t);
     
    -void	ip6_forward(struct mbuf *, struct rtentry *, int);
    +void	ip6_forward(struct mbuf *, struct route *, int);
     
     void	ip6_mloopback(struct ifnet *, struct mbuf *, struct sockaddr_in6 *);
     int	ip6_output(struct mbuf *, struct ip6_pktopts *, struct route *, int,
    
    
    
  • Alexander Bluhm:

    route cache mpath