Index | Thread | Search

From:
Claudio Jeker <cjeker@diehard.n-r-g.com>
Subject:
Re: ip6 forward mcopy
To:
Alexander Bluhm <bluhm@openbsd.org>
Cc:
tech@openbsd.org
Date:
Mon, 24 Jun 2024 12:23:56 +0200

Download raw body.

Thread
On Mon, Jun 24, 2024 at 11:33:44AM +0200, Alexander Bluhm wrote:
> Hi,
> 
> Forwarding IPv6 packets is slower than IPv4.  Reason is the m_copym()
> that is done for every packet.  Just in case we may have to send
> an ICMP6 packet, ip6_forward() creates a mbuf copy.  After that
> mbuf cluster is read only, so for the ethernet header another mbuf
> is allocated.  pf NAT and RDR ignores readonly clusters, so it also
> modifies the potential ICMP6 packet.
> 
> IPv4 ip_forward() avoids all these problems by copying the leading
> 68 bytes of the original packets onto the stack.  More is not need
> for ICMP.  IPv6 RFC requires up to 1232 bytes in the ICMP6 packet.
> This cannot be copied to the stack.
> 
> The reason for the difference is that the ICMP6 packet has to contain
> the full header chain.  I think if we have a simple UDP or TCP
> packet without chain, we could do a shortcut and just copy the
> header to ICMP6 packet.
> 
> Do we want to violate the RFC and gain 30% performance for the
> common TCP/UDP case?

Why only TCP/UDP? IMO this could be done for all L4 IP protocols.
Only if the packet has hop-to-hop headers or some other strange
intermediate header we need to copy all.

Do you know what other systems do here? Bouncing back 1232 bytes of
payload is just bonkers.

> bluhm
> 
> Index: netinet6/ip6_forward.c
> ===================================================================
> RCS file: /data/mirror/openbsd/cvs/src/sys/netinet6/ip6_forward.c,v
> diff -u -p -r1.119 ip6_forward.c
> --- netinet6/ip6_forward.c	20 Jun 2024 19:25:42 -0000	1.119
> +++ netinet6/ip6_forward.c	24 Jun 2024 08:16:04 -0000
> @@ -89,8 +89,15 @@ ip6_forward(struct mbuf *m, struct route
>  	struct rtentry *rt;
>  	struct sockaddr *dst;
>  	struct ifnet *ifp = NULL;
> -	int error = 0, type = 0, code = 0, destmtu = 0;
> +	u_int rtableid = m->m_pkthdr.ph_rtableid;
> +	u_int ifidx = m->m_pkthdr.ph_ifidx;
> +	u_int icmp_len;
> +	char icmp_buf[sizeof(struct ip6_hdr) + sizeof(struct tcphdr) +
> +	    MAX_TCPOPTLEN];
> +	CTASSERT(sizeof(icmp_buf) <= MHLEN);
> +	u_short mflags, pfflags;
>  	struct mbuf *mcopy;
> +	int error = 0, type = 0, code = 0, destmtu = 0;
>  #ifdef IPSEC
>  	struct tdb *tdb = NULL;
>  #endif /* IPSEC */
> @@ -117,9 +124,7 @@ ip6_forward(struct mbuf *m, struct route
>  			log(LOG_DEBUG,
>  			    "cannot forward "
>  			    "from %s to %s nxt %d received on interface %u\n",
> -			    src6, dst6,
> -			    ip6->ip6_nxt,
> -			    m->m_pkthdr.ph_ifidx);
> +			    src6, dst6, ip6->ip6_nxt, ifidx);
>  		}
>  		m_freem(m);
>  		goto done;
> @@ -138,11 +143,30 @@ ip6_forward(struct mbuf *m, struct route
>  	 * we need to generate an ICMP6 message to the src.
>  	 * Thanks to M_EXT, in most cases copy will not occur.
>  	 *
> +	 * For final protocol header like TCP or UDP, full header chain in
> +	 * ICMP6 packet is not necessary.  In this case only copy small
> +	 * part of original packet and save it on stack instead of mbuf.
> +	 * Although this violates RFC, it avoids additional mbuf allocations.
> +	 * Also pf nat and rdr do not affect the shared mbuf cluster.
> +	 *
>  	 * It is important to save it before IPsec processing as IPsec
>  	 * processing may modify the mbuf.
>  	 */
> -	mcopy = m_copym(m, 0, imin(m->m_pkthdr.len, ICMPV6_PLD_MAXLEN),
> -	    M_NOWAIT);
> +	switch (ip6->ip6_nxt) {
> +	case IPPROTO_TCP:
> +	case IPPROTO_UDP:
> +		icmp_len = min(m->m_pkthdr.len, sizeof(icmp_buf));
> +		mflags = m->m_flags;
> +		pfflags = m->m_pkthdr.pf.flags;
> +		m_copydata(m, 0, icmp_len, icmp_buf);
> +		mcopy = NULL;
> +		break;
> +	default:
> +		icmp_len = min(m->m_pkthdr.len, ICMPV6_PLD_MAXLEN);
> +		mcopy = m_copym(m, 0, icmp_len, M_NOWAIT);
> +		icmp_len = 0;
> +		break;
> +	}
>  
>  #if NPF > 0
>  reroute:
> @@ -174,12 +198,10 @@ reroute:
>  	    m->m_pkthdr.ph_rtableid);
>  	if (rt == NULL) {
>  		ip6stat_inc(ip6s_noroute);
> -		if (mcopy != NULL) {
> -			icmp6_error(mcopy, ICMP6_DST_UNREACH,
> -				    ICMP6_DST_UNREACH_NOROUTE, 0);
> -		}
> +		type = ICMP6_DST_UNREACH;
> +		code = ICMP6_DST_UNREACH_NOROUTE;
>  		m_freem(m);
> -		goto done;
> +		goto icmperror;
>  	}
>  	dst = &ro->ro_dstsa;
>  
> @@ -190,7 +212,7 @@ reroute:
>  	 * unreachable error with Code 2 (beyond scope of source address).
>  	 * [draft-ietf-ipngwg-icmp-v3-00.txt, Section 3.1]
>  	 */
> -	if (in6_addr2scopeid(m->m_pkthdr.ph_ifidx, &ip6->ip6_src) !=
> +	if (in6_addr2scopeid(ifidx, &ip6->ip6_src) !=
>  	    in6_addr2scopeid(rt->rt_ifidx, &ip6->ip6_src)) {
>  		time_t uptime;
>  
> @@ -205,15 +227,12 @@ reroute:
>  			log(LOG_DEBUG,
>  			    "cannot forward "
>  			    "src %s, dst %s, nxt %d, rcvif %u, outif %u\n",
> -			    src6, dst6,
> -			    ip6->ip6_nxt,
> -			    m->m_pkthdr.ph_ifidx, rt->rt_ifidx);
> +			    src6, dst6, ip6->ip6_nxt, ifidx, rt->rt_ifidx);
>  		}
> -		if (mcopy != NULL)
> -			icmp6_error(mcopy, ICMP6_DST_UNREACH,
> -				    ICMP6_DST_UNREACH_BEYONDSCOPE, 0);
> +		type = ICMP6_DST_UNREACH;
> +		code = ICMP6_DST_UNREACH_BEYONDSCOPE;
>  		m_freem(m);
> -		goto done;
> +		goto icmperror;
>  	}
>  
>  #ifdef IPSEC
> @@ -248,7 +267,7 @@ reroute:
>  		m_freem(m);
>  		goto freecopy;
>  	}
> -	if (rt->rt_ifidx == m->m_pkthdr.ph_ifidx &&
> +	if (rt->rt_ifidx == ifidx &&
>  	    ip6_sendredirects && !ISSET(flags, IPV6_REDIRECT) &&
>  	    (rt->rt_flags & (RTF_DYNAMIC|RTF_MODIFIED)) == 0) {
>  		if ((ifp->if_flags & IFF_POINTOPOINT) &&
> @@ -268,11 +287,10 @@ reroute:
>  			 * type/code is based on suggestion by Rich Draves.
>  			 * not sure if it is the best pick.
>  			 */
> -			if (mcopy != NULL)
> -				icmp6_error(mcopy, ICMP6_DST_UNREACH,
> -				    ICMP6_DST_UNREACH_ADDR, 0);
> +			type = ICMP6_DST_UNREACH;
> +			code = ICMP6_DST_UNREACH_ADDR;
>  			m_freem(m);
> -			goto done;
> +			goto icmperror;
>  		}
>  		type = ND_REDIRECT;
>  	}
> @@ -323,27 +341,40 @@ reroute:
>  	if (error || m == NULL)
>  		goto senderr;
>  
> -	if (mcopy != NULL)
> -		icmp6_error(mcopy, ICMP6_PACKET_TOO_BIG, 0, ifp->if_mtu);
> +	type = ICMP6_PACKET_TOO_BIG;
> +	destmtu = ifp->if_mtu;
>  	m_freem(m);
> -	goto done;
> +	goto icmperror;
>  
>  senderr:
> -	if (mcopy == NULL)
> +	if (mcopy == NULL && icmp_len == 0)
>  		goto done;
>  
>  	switch (error) {
>  	case 0:
>  		if (type == ND_REDIRECT) {
> -			icmp6_redirect_output(mcopy, rt);
> -			ip6stat_inc(ip6s_redirectsent);
> +			if (icmp_len != 0) {
> +				mcopy = m_gethdr(M_DONTWAIT, MT_DATA);
> +				if (mcopy == NULL)
> +					goto done;
> +				mcopy->m_len = mcopy->m_pkthdr.len = icmp_len;
> +				mcopy->m_flags |= (mflags & M_COPYFLAGS);
> +				mcopy->m_pkthdr.ph_rtableid = rtableid;
> +				mcopy->m_pkthdr.ph_ifidx = ifidx;
> +				mcopy->m_pkthdr.pf.flags |=
> +				    (pfflags & PF_TAG_GENERATED);
> +				memcpy(mcopy->m_data, icmp_buf, icmp_len);
> +			}
> +			if (mcopy != NULL) {
> +				icmp6_redirect_output(mcopy, rt);
> +				ip6stat_inc(ip6s_redirectsent);
> +			}
>  			goto done;
>  		}
>  		goto freecopy;
>  
>  	case EMSGSIZE:
>  		type = ICMP6_PACKET_TOO_BIG;
> -		code = 0;
>  		if (rt != NULL) {
>  			if (rt->rt_mtu) {
>  				destmtu = rt->rt_mtu;
> @@ -381,7 +412,20 @@ senderr:
>  		code = ICMP6_DST_UNREACH_ADDR;
>  		break;
>  	}
> -	icmp6_error(mcopy, type, code, destmtu);
> + icmperror:
> +	if (icmp_len != 0) {
> +		mcopy = m_gethdr(M_DONTWAIT, MT_DATA);
> +		if (mcopy == NULL)
> +			goto done;
> +		mcopy->m_len = mcopy->m_pkthdr.len = icmp_len;
> +		mcopy->m_flags |= (mflags & M_COPYFLAGS);
> +		mcopy->m_pkthdr.ph_rtableid = rtableid;
> +		mcopy->m_pkthdr.ph_ifidx = ifidx;
> +		mcopy->m_pkthdr.pf.flags |= (pfflags & PF_TAG_GENERATED);
> +		memcpy(mcopy->m_data, icmp_buf, icmp_len);
> +	}
> +	if (mcopy != NULL)
> +		icmp6_error(mcopy, type, code, destmtu);
>  	goto done;
>  
>   freecopy:
> 

-- 
:wq Claudio