Download raw body.
ip6 forward mcopy
Hi,
Forwarding IPv6 packets is slower than IPv4. Reason is the m_copym()
that is done for every packet. Just in case we may have to send
an ICMP6 packet, ip6_forward() creates a mbuf copy. After that
mbuf cluster is read only, so for the ethernet header another mbuf
is allocated. pf NAT and RDR ignores readonly clusters, so it also
modifies the potential ICMP6 packet.
IPv4 ip_forward() avoids all these problems by copying the leading
68 bytes of the original packets onto the stack. More is not need
for ICMP. IPv6 RFC requires up to 1232 bytes in the ICMP6 packet.
This cannot be copied to the stack.
The reason for the difference is that the ICMP6 packet has to contain
the full header chain. I think if we have a simple UDP or TCP
packet without chain, we could do a shortcut and just copy the
header to ICMP6 packet.
Do we want to violate the RFC and gain 30% performance for the
common TCP/UDP case?
bluhm
Index: netinet6/ip6_forward.c
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/netinet6/ip6_forward.c,v
diff -u -p -r1.119 ip6_forward.c
--- netinet6/ip6_forward.c 20 Jun 2024 19:25:42 -0000 1.119
+++ netinet6/ip6_forward.c 24 Jun 2024 08:16:04 -0000
@@ -89,8 +89,15 @@ ip6_forward(struct mbuf *m, struct route
struct rtentry *rt;
struct sockaddr *dst;
struct ifnet *ifp = NULL;
- int error = 0, type = 0, code = 0, destmtu = 0;
+ u_int rtableid = m->m_pkthdr.ph_rtableid;
+ u_int ifidx = m->m_pkthdr.ph_ifidx;
+ u_int icmp_len;
+ char icmp_buf[sizeof(struct ip6_hdr) + sizeof(struct tcphdr) +
+ MAX_TCPOPTLEN];
+ CTASSERT(sizeof(icmp_buf) <= MHLEN);
+ u_short mflags, pfflags;
struct mbuf *mcopy;
+ int error = 0, type = 0, code = 0, destmtu = 0;
#ifdef IPSEC
struct tdb *tdb = NULL;
#endif /* IPSEC */
@@ -117,9 +124,7 @@ ip6_forward(struct mbuf *m, struct route
log(LOG_DEBUG,
"cannot forward "
"from %s to %s nxt %d received on interface %u\n",
- src6, dst6,
- ip6->ip6_nxt,
- m->m_pkthdr.ph_ifidx);
+ src6, dst6, ip6->ip6_nxt, ifidx);
}
m_freem(m);
goto done;
@@ -138,11 +143,30 @@ ip6_forward(struct mbuf *m, struct route
* we need to generate an ICMP6 message to the src.
* Thanks to M_EXT, in most cases copy will not occur.
*
+ * For final protocol header like TCP or UDP, full header chain in
+ * ICMP6 packet is not necessary. In this case only copy small
+ * part of original packet and save it on stack instead of mbuf.
+ * Although this violates RFC, it avoids additional mbuf allocations.
+ * Also pf nat and rdr do not affect the shared mbuf cluster.
+ *
* It is important to save it before IPsec processing as IPsec
* processing may modify the mbuf.
*/
- mcopy = m_copym(m, 0, imin(m->m_pkthdr.len, ICMPV6_PLD_MAXLEN),
- M_NOWAIT);
+ switch (ip6->ip6_nxt) {
+ case IPPROTO_TCP:
+ case IPPROTO_UDP:
+ icmp_len = min(m->m_pkthdr.len, sizeof(icmp_buf));
+ mflags = m->m_flags;
+ pfflags = m->m_pkthdr.pf.flags;
+ m_copydata(m, 0, icmp_len, icmp_buf);
+ mcopy = NULL;
+ break;
+ default:
+ icmp_len = min(m->m_pkthdr.len, ICMPV6_PLD_MAXLEN);
+ mcopy = m_copym(m, 0, icmp_len, M_NOWAIT);
+ icmp_len = 0;
+ break;
+ }
#if NPF > 0
reroute:
@@ -174,12 +198,10 @@ reroute:
m->m_pkthdr.ph_rtableid);
if (rt == NULL) {
ip6stat_inc(ip6s_noroute);
- if (mcopy != NULL) {
- icmp6_error(mcopy, ICMP6_DST_UNREACH,
- ICMP6_DST_UNREACH_NOROUTE, 0);
- }
+ type = ICMP6_DST_UNREACH;
+ code = ICMP6_DST_UNREACH_NOROUTE;
m_freem(m);
- goto done;
+ goto icmperror;
}
dst = &ro->ro_dstsa;
@@ -190,7 +212,7 @@ reroute:
* unreachable error with Code 2 (beyond scope of source address).
* [draft-ietf-ipngwg-icmp-v3-00.txt, Section 3.1]
*/
- if (in6_addr2scopeid(m->m_pkthdr.ph_ifidx, &ip6->ip6_src) !=
+ if (in6_addr2scopeid(ifidx, &ip6->ip6_src) !=
in6_addr2scopeid(rt->rt_ifidx, &ip6->ip6_src)) {
time_t uptime;
@@ -205,15 +227,12 @@ reroute:
log(LOG_DEBUG,
"cannot forward "
"src %s, dst %s, nxt %d, rcvif %u, outif %u\n",
- src6, dst6,
- ip6->ip6_nxt,
- m->m_pkthdr.ph_ifidx, rt->rt_ifidx);
+ src6, dst6, ip6->ip6_nxt, ifidx, rt->rt_ifidx);
}
- if (mcopy != NULL)
- icmp6_error(mcopy, ICMP6_DST_UNREACH,
- ICMP6_DST_UNREACH_BEYONDSCOPE, 0);
+ type = ICMP6_DST_UNREACH;
+ code = ICMP6_DST_UNREACH_BEYONDSCOPE;
m_freem(m);
- goto done;
+ goto icmperror;
}
#ifdef IPSEC
@@ -248,7 +267,7 @@ reroute:
m_freem(m);
goto freecopy;
}
- if (rt->rt_ifidx == m->m_pkthdr.ph_ifidx &&
+ if (rt->rt_ifidx == ifidx &&
ip6_sendredirects && !ISSET(flags, IPV6_REDIRECT) &&
(rt->rt_flags & (RTF_DYNAMIC|RTF_MODIFIED)) == 0) {
if ((ifp->if_flags & IFF_POINTOPOINT) &&
@@ -268,11 +287,10 @@ reroute:
* type/code is based on suggestion by Rich Draves.
* not sure if it is the best pick.
*/
- if (mcopy != NULL)
- icmp6_error(mcopy, ICMP6_DST_UNREACH,
- ICMP6_DST_UNREACH_ADDR, 0);
+ type = ICMP6_DST_UNREACH;
+ code = ICMP6_DST_UNREACH_ADDR;
m_freem(m);
- goto done;
+ goto icmperror;
}
type = ND_REDIRECT;
}
@@ -323,27 +341,40 @@ reroute:
if (error || m == NULL)
goto senderr;
- if (mcopy != NULL)
- icmp6_error(mcopy, ICMP6_PACKET_TOO_BIG, 0, ifp->if_mtu);
+ type = ICMP6_PACKET_TOO_BIG;
+ destmtu = ifp->if_mtu;
m_freem(m);
- goto done;
+ goto icmperror;
senderr:
- if (mcopy == NULL)
+ if (mcopy == NULL && icmp_len == 0)
goto done;
switch (error) {
case 0:
if (type == ND_REDIRECT) {
- icmp6_redirect_output(mcopy, rt);
- ip6stat_inc(ip6s_redirectsent);
+ if (icmp_len != 0) {
+ mcopy = m_gethdr(M_DONTWAIT, MT_DATA);
+ if (mcopy == NULL)
+ goto done;
+ mcopy->m_len = mcopy->m_pkthdr.len = icmp_len;
+ mcopy->m_flags |= (mflags & M_COPYFLAGS);
+ mcopy->m_pkthdr.ph_rtableid = rtableid;
+ mcopy->m_pkthdr.ph_ifidx = ifidx;
+ mcopy->m_pkthdr.pf.flags |=
+ (pfflags & PF_TAG_GENERATED);
+ memcpy(mcopy->m_data, icmp_buf, icmp_len);
+ }
+ if (mcopy != NULL) {
+ icmp6_redirect_output(mcopy, rt);
+ ip6stat_inc(ip6s_redirectsent);
+ }
goto done;
}
goto freecopy;
case EMSGSIZE:
type = ICMP6_PACKET_TOO_BIG;
- code = 0;
if (rt != NULL) {
if (rt->rt_mtu) {
destmtu = rt->rt_mtu;
@@ -381,7 +412,20 @@ senderr:
code = ICMP6_DST_UNREACH_ADDR;
break;
}
- icmp6_error(mcopy, type, code, destmtu);
+ icmperror:
+ if (icmp_len != 0) {
+ mcopy = m_gethdr(M_DONTWAIT, MT_DATA);
+ if (mcopy == NULL)
+ goto done;
+ mcopy->m_len = mcopy->m_pkthdr.len = icmp_len;
+ mcopy->m_flags |= (mflags & M_COPYFLAGS);
+ mcopy->m_pkthdr.ph_rtableid = rtableid;
+ mcopy->m_pkthdr.ph_ifidx = ifidx;
+ mcopy->m_pkthdr.pf.flags |= (pfflags & PF_TAG_GENERATED);
+ memcpy(mcopy->m_data, icmp_buf, icmp_len);
+ }
+ if (mcopy != NULL)
+ icmp6_error(mcopy, type, code, destmtu);
goto done;
freecopy:
ip6 forward mcopy