Index | Thread | Search

From:
Claudio Jeker <cjeker@diehard.n-r-g.com>
Subject:
Re: ip forward mfake mbuf copy
To:
Alexander Bluhm <bluhm@openbsd.org>
Cc:
tech@openbsd.org
Date:
Mon, 24 Jun 2024 12:11:50 +0200

Download raw body.

Thread
On Mon, Jun 24, 2024 at 11:36:07AM +0200, Alexander Bluhm wrote:
> On Fri, Jun 21, 2024 at 06:42:13PM +0200, Alexander Bluhm wrote:
> > Currently ip_forward() allocates a fake mbuf copy on the stack to
> > send an ICMP packet after ip_output() has failed.  It seems easier
> > to just copy the data onto the stack that icmp_error() may use.
> > Only if the ICMP is acutally sent, create the mbuf.
> > 
> > m_dup_pkthdr() uses atomic operation to link the incpb to mbuf.
> > pf_pkt_addr_changed() is immedately called afterwards to remove the
> > linkage again.  Also m_tag_delete_chain() is overhead.
> 
> Updated diff.  Moves a few lines around to be consistent with my
> ip6_forward() diff.
> 
> anyone?

The only bit that worries me is that we now need to copy mbuf header
fields explicitly in this code path. Now icmp_do_error() does the same and
you provide all the field that code needs. So this is fine.
Now looking at struct pkthdr I think there is one field that we may want
to include here as well: ph_loopcnt.
At least that way we could detect packet loops going through icmp_error()
in case someone ever manages to build such a case.

In my opinon both ways of handling this are equally ugly. Your version
uses a little bit less implicit magic and stack. So OK claudio@
 
> bluhm
> 
> Index: netinet/ip_input.c
> ===================================================================
> RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/ip_input.c,v
> diff -u -p -r1.395 ip_input.c
> --- netinet/ip_input.c	7 Jun 2024 18:24:16 -0000	1.395
> +++ netinet/ip_input.c	24 Jun 2024 08:10:06 -0000
> @@ -1532,11 +1532,16 @@ const u_char inetctlerrmap[PRC_NCMDS] = 
>  void
>  ip_forward(struct mbuf *m, struct ifnet *ifp, struct route *ro, int flags)
>  {
> -	struct mbuf mfake, *mcopy;
>  	struct ip *ip = mtod(m, struct ip *);
>  	struct route iproute;
>  	struct rtentry *rt;
> -	int error = 0, type = 0, code = 0, destmtu = 0, fake = 0, len;
> +	u_int rtableid = m->m_pkthdr.ph_rtableid;
> +	u_int icmp_len;
> +	char icmp_buf[68];
> +	CTASSERT(sizeof(icmp_buf) <= MHLEN);
> +	u_short mflags, pfflags;
> +	struct mbuf *mcopy;
> +	int error = 0, type = 0, code = 0, destmtu = 0;
>  	u_int32_t dest;
>  
>  	dest = 0;
> @@ -1554,7 +1559,7 @@ ip_forward(struct mbuf *m, struct ifnet 
>  		ro = &iproute;
>  		ro->ro_rt = NULL;
>  	}
> -	rt = route_mpath(ro, &ip->ip_dst, &ip->ip_src, m->m_pkthdr.ph_rtableid);
> +	rt = route_mpath(ro, &ip->ip_dst, &ip->ip_src, rtableid);
>  	if (rt == NULL) {
>  		ipstat_inc(ips_noroute);
>  		icmp_error(m, ICMP_UNREACH, ICMP_UNREACH_HOST, dest, 0);
> @@ -1562,24 +1567,14 @@ ip_forward(struct mbuf *m, struct ifnet 
>  	}
>  
>  	/*
> -	 * Save at most 68 bytes of the packet in case
> -	 * we need to generate an ICMP message to the src.
> -	 * The data is saved in the mbuf on the stack that
> -	 * acts as a temporary storage not intended to be
> -	 * passed down the IP stack or to the mfree.
> +	 * Save at most 68 bytes of the packet in case we need to generate
> +	 * an ICMP message to the src.  The data is saved on the stack.
> +	 * A new mbuf is only allocated when ICMP is actually created.
>  	 */
> -	memset(&mfake.m_hdr, 0, sizeof(mfake.m_hdr));
> -	mfake.m_type = m->m_type;
> -	if (m_dup_pkthdr(&mfake, m, M_DONTWAIT) == 0) {
> -		mfake.m_data = mfake.m_pktdat;
> -		len = min(ntohs(ip->ip_len), 68);
> -		m_copydata(m, 0, len, mfake.m_pktdat);
> -		mfake.m_pkthdr.len = mfake.m_len = len;
> -#if NPF > 0
> -		pf_pkt_addr_changed(&mfake);
> -#endif	/* NPF > 0 */
> -		fake = 1;
> -	}
> +	icmp_len = min(sizeof(icmp_buf), ntohs(ip->ip_len));
> +	mflags = m->m_flags;
> +	pfflags = m->m_pkthdr.pf.flags;
> +	m_copydata(m, 0, icmp_len, icmp_buf);
>  
>  	ip->ip_ttl -= IPTTLDEC;
>  
> @@ -1597,7 +1592,7 @@ ip_forward(struct mbuf *m, struct ifnet 
>  	    (rt->rt_flags & (RTF_DYNAMIC|RTF_MODIFIED)) == 0 &&
>  	    satosin(rt_key(rt))->sin_addr.s_addr != 0 &&
>  	    ip_sendredirects && !ISSET(flags, IP_REDIRECT) &&
> -	    !arpproxy(satosin(rt_key(rt))->sin_addr, m->m_pkthdr.ph_rtableid)) {
> +	    !arpproxy(satosin(rt_key(rt))->sin_addr, rtableid)) {
>  		if ((ip->ip_src.s_addr & ifatoia(rt->rt_ifa)->ia_netmask) ==
>  		    ifatoia(rt->rt_ifa)->ia_net) {
>  		    if (rt->rt_flags & RTF_GATEWAY)
> @@ -1621,9 +1616,6 @@ ip_forward(struct mbuf *m, struct ifnet 
>  		else
>  			goto done;
>  	}
> -	if (!fake)
> -		goto done;
> -
>  	switch (error) {
>  	case 0:				/* forwarded, but need redirect */
>  		/* type, code set above */
> @@ -1674,15 +1666,21 @@ ip_forward(struct mbuf *m, struct ifnet 
>  		code = ICMP_UNREACH_HOST;
>  		break;
>  	}
> -	mcopy = m_copym(&mfake, 0, len, M_DONTWAIT);
> -	if (mcopy != NULL)
> -		icmp_error(mcopy, type, code, dest, destmtu);
> +
> +	mcopy = m_gethdr(M_DONTWAIT, MT_DATA);
> +	if (mcopy == NULL)
> +		goto done;
> +	mcopy->m_len = mcopy->m_pkthdr.len = icmp_len;
> +	mcopy->m_flags |= (mflags & M_COPYFLAGS);
> +	mcopy->m_pkthdr.ph_rtableid = rtableid;
> +	mcopy->m_pkthdr.ph_ifidx = ifp->if_index;
> +	mcopy->m_pkthdr.pf.flags |= (pfflags & PF_TAG_GENERATED);
> +	memcpy(mcopy->m_data, icmp_buf, icmp_len);
> +	icmp_error(mcopy, type, code, dest, destmtu);
>  
>   done:
>  	if (ro == &iproute)
>  		rtfree(ro->ro_rt);
> -	if (fake)
> -		m_tag_delete_chain(&mfake);
>  }
>  
>  int
> 

-- 
:wq Claudio