Download raw body.
ip6 forward mcopy
On Mon, Jun 24, 2024 at 11:33:44AM +0200, Alexander Bluhm wrote:
> Hi,
>
> Forwarding IPv6 packets is slower than IPv4. Reason is the m_copym()
> that is done for every packet. Just in case we may have to send
> an ICMP6 packet, ip6_forward() creates a mbuf copy. After that
> mbuf cluster is read only, so for the ethernet header another mbuf
> is allocated. pf NAT and RDR ignores readonly clusters, so it also
> modifies the potential ICMP6 packet.
>
> IPv4 ip_forward() avoids all these problems by copying the leading
> 68 bytes of the original packets onto the stack. More is not need
> for ICMP. IPv6 RFC requires up to 1232 bytes in the ICMP6 packet.
> This cannot be copied to the stack.
>
> The reason for the difference is that the ICMP6 packet has to contain
> the full header chain. I think if we have a simple UDP or TCP
> packet without chain, we could do a shortcut and just copy the
> header to ICMP6 packet.
>
> Do we want to violate the RFC and gain 30% performance for the
> common TCP/UDP case?
Why only TCP/UDP? IMO this could be done for all L4 IP protocols.
Only if the packet has hop-to-hop headers or some other strange
intermediate header we need to copy all.
Do you know what other systems do here? Bouncing back 1232 bytes of
payload is just bonkers.
> bluhm
>
> Index: netinet6/ip6_forward.c
> ===================================================================
> RCS file: /data/mirror/openbsd/cvs/src/sys/netinet6/ip6_forward.c,v
> diff -u -p -r1.119 ip6_forward.c
> --- netinet6/ip6_forward.c 20 Jun 2024 19:25:42 -0000 1.119
> +++ netinet6/ip6_forward.c 24 Jun 2024 08:16:04 -0000
> @@ -89,8 +89,15 @@ ip6_forward(struct mbuf *m, struct route
> struct rtentry *rt;
> struct sockaddr *dst;
> struct ifnet *ifp = NULL;
> - int error = 0, type = 0, code = 0, destmtu = 0;
> + u_int rtableid = m->m_pkthdr.ph_rtableid;
> + u_int ifidx = m->m_pkthdr.ph_ifidx;
> + u_int icmp_len;
> + char icmp_buf[sizeof(struct ip6_hdr) + sizeof(struct tcphdr) +
> + MAX_TCPOPTLEN];
> + CTASSERT(sizeof(icmp_buf) <= MHLEN);
> + u_short mflags, pfflags;
> struct mbuf *mcopy;
> + int error = 0, type = 0, code = 0, destmtu = 0;
> #ifdef IPSEC
> struct tdb *tdb = NULL;
> #endif /* IPSEC */
> @@ -117,9 +124,7 @@ ip6_forward(struct mbuf *m, struct route
> log(LOG_DEBUG,
> "cannot forward "
> "from %s to %s nxt %d received on interface %u\n",
> - src6, dst6,
> - ip6->ip6_nxt,
> - m->m_pkthdr.ph_ifidx);
> + src6, dst6, ip6->ip6_nxt, ifidx);
> }
> m_freem(m);
> goto done;
> @@ -138,11 +143,30 @@ ip6_forward(struct mbuf *m, struct route
> * we need to generate an ICMP6 message to the src.
> * Thanks to M_EXT, in most cases copy will not occur.
> *
> + * For final protocol header like TCP or UDP, full header chain in
> + * ICMP6 packet is not necessary. In this case only copy small
> + * part of original packet and save it on stack instead of mbuf.
> + * Although this violates RFC, it avoids additional mbuf allocations.
> + * Also pf nat and rdr do not affect the shared mbuf cluster.
> + *
> * It is important to save it before IPsec processing as IPsec
> * processing may modify the mbuf.
> */
> - mcopy = m_copym(m, 0, imin(m->m_pkthdr.len, ICMPV6_PLD_MAXLEN),
> - M_NOWAIT);
> + switch (ip6->ip6_nxt) {
> + case IPPROTO_TCP:
> + case IPPROTO_UDP:
> + icmp_len = min(m->m_pkthdr.len, sizeof(icmp_buf));
> + mflags = m->m_flags;
> + pfflags = m->m_pkthdr.pf.flags;
> + m_copydata(m, 0, icmp_len, icmp_buf);
> + mcopy = NULL;
> + break;
> + default:
> + icmp_len = min(m->m_pkthdr.len, ICMPV6_PLD_MAXLEN);
> + mcopy = m_copym(m, 0, icmp_len, M_NOWAIT);
> + icmp_len = 0;
> + break;
> + }
>
> #if NPF > 0
> reroute:
> @@ -174,12 +198,10 @@ reroute:
> m->m_pkthdr.ph_rtableid);
> if (rt == NULL) {
> ip6stat_inc(ip6s_noroute);
> - if (mcopy != NULL) {
> - icmp6_error(mcopy, ICMP6_DST_UNREACH,
> - ICMP6_DST_UNREACH_NOROUTE, 0);
> - }
> + type = ICMP6_DST_UNREACH;
> + code = ICMP6_DST_UNREACH_NOROUTE;
> m_freem(m);
> - goto done;
> + goto icmperror;
> }
> dst = &ro->ro_dstsa;
>
> @@ -190,7 +212,7 @@ reroute:
> * unreachable error with Code 2 (beyond scope of source address).
> * [draft-ietf-ipngwg-icmp-v3-00.txt, Section 3.1]
> */
> - if (in6_addr2scopeid(m->m_pkthdr.ph_ifidx, &ip6->ip6_src) !=
> + if (in6_addr2scopeid(ifidx, &ip6->ip6_src) !=
> in6_addr2scopeid(rt->rt_ifidx, &ip6->ip6_src)) {
> time_t uptime;
>
> @@ -205,15 +227,12 @@ reroute:
> log(LOG_DEBUG,
> "cannot forward "
> "src %s, dst %s, nxt %d, rcvif %u, outif %u\n",
> - src6, dst6,
> - ip6->ip6_nxt,
> - m->m_pkthdr.ph_ifidx, rt->rt_ifidx);
> + src6, dst6, ip6->ip6_nxt, ifidx, rt->rt_ifidx);
> }
> - if (mcopy != NULL)
> - icmp6_error(mcopy, ICMP6_DST_UNREACH,
> - ICMP6_DST_UNREACH_BEYONDSCOPE, 0);
> + type = ICMP6_DST_UNREACH;
> + code = ICMP6_DST_UNREACH_BEYONDSCOPE;
> m_freem(m);
> - goto done;
> + goto icmperror;
> }
>
> #ifdef IPSEC
> @@ -248,7 +267,7 @@ reroute:
> m_freem(m);
> goto freecopy;
> }
> - if (rt->rt_ifidx == m->m_pkthdr.ph_ifidx &&
> + if (rt->rt_ifidx == ifidx &&
> ip6_sendredirects && !ISSET(flags, IPV6_REDIRECT) &&
> (rt->rt_flags & (RTF_DYNAMIC|RTF_MODIFIED)) == 0) {
> if ((ifp->if_flags & IFF_POINTOPOINT) &&
> @@ -268,11 +287,10 @@ reroute:
> * type/code is based on suggestion by Rich Draves.
> * not sure if it is the best pick.
> */
> - if (mcopy != NULL)
> - icmp6_error(mcopy, ICMP6_DST_UNREACH,
> - ICMP6_DST_UNREACH_ADDR, 0);
> + type = ICMP6_DST_UNREACH;
> + code = ICMP6_DST_UNREACH_ADDR;
> m_freem(m);
> - goto done;
> + goto icmperror;
> }
> type = ND_REDIRECT;
> }
> @@ -323,27 +341,40 @@ reroute:
> if (error || m == NULL)
> goto senderr;
>
> - if (mcopy != NULL)
> - icmp6_error(mcopy, ICMP6_PACKET_TOO_BIG, 0, ifp->if_mtu);
> + type = ICMP6_PACKET_TOO_BIG;
> + destmtu = ifp->if_mtu;
> m_freem(m);
> - goto done;
> + goto icmperror;
>
> senderr:
> - if (mcopy == NULL)
> + if (mcopy == NULL && icmp_len == 0)
> goto done;
>
> switch (error) {
> case 0:
> if (type == ND_REDIRECT) {
> - icmp6_redirect_output(mcopy, rt);
> - ip6stat_inc(ip6s_redirectsent);
> + if (icmp_len != 0) {
> + mcopy = m_gethdr(M_DONTWAIT, MT_DATA);
> + if (mcopy == NULL)
> + goto done;
> + mcopy->m_len = mcopy->m_pkthdr.len = icmp_len;
> + mcopy->m_flags |= (mflags & M_COPYFLAGS);
> + mcopy->m_pkthdr.ph_rtableid = rtableid;
> + mcopy->m_pkthdr.ph_ifidx = ifidx;
> + mcopy->m_pkthdr.pf.flags |=
> + (pfflags & PF_TAG_GENERATED);
> + memcpy(mcopy->m_data, icmp_buf, icmp_len);
> + }
> + if (mcopy != NULL) {
> + icmp6_redirect_output(mcopy, rt);
> + ip6stat_inc(ip6s_redirectsent);
> + }
> goto done;
> }
> goto freecopy;
>
> case EMSGSIZE:
> type = ICMP6_PACKET_TOO_BIG;
> - code = 0;
> if (rt != NULL) {
> if (rt->rt_mtu) {
> destmtu = rt->rt_mtu;
> @@ -381,7 +412,20 @@ senderr:
> code = ICMP6_DST_UNREACH_ADDR;
> break;
> }
> - icmp6_error(mcopy, type, code, destmtu);
> + icmperror:
> + if (icmp_len != 0) {
> + mcopy = m_gethdr(M_DONTWAIT, MT_DATA);
> + if (mcopy == NULL)
> + goto done;
> + mcopy->m_len = mcopy->m_pkthdr.len = icmp_len;
> + mcopy->m_flags |= (mflags & M_COPYFLAGS);
> + mcopy->m_pkthdr.ph_rtableid = rtableid;
> + mcopy->m_pkthdr.ph_ifidx = ifidx;
> + mcopy->m_pkthdr.pf.flags |= (pfflags & PF_TAG_GENERATED);
> + memcpy(mcopy->m_data, icmp_buf, icmp_len);
> + }
> + if (mcopy != NULL)
> + icmp6_error(mcopy, type, code, destmtu);
> goto done;
>
> freecopy:
>
--
:wq Claudio
ip6 forward mcopy