From: Claudio Jeker Subject: Re: refactor mpls interface input To: David Gwynne Cc: tech@openbsd.org Date: Tue, 25 Nov 2025 10:29:52 +0100 On Tue, Nov 25, 2025 at 12:32:22PM +1000, David Gwynne wrote: > this hoists the mpls interface input processing up a bit in the mpls > label processing. > > mpw, mpip, and mpe all add entries to the local mpls fib that points to > themselves, and when these labels are "output" via these interfaces they > then go and push the packets into their input processing. this is all > boilerplate, so it can be factored out and better integrated into the > larger network stack. in particular, we can pass struct netstack through > to the input handlers. > > there's some small downsides to this. the main one is that using > if_vinput to dispatch to their input handlers means the vinput handling > has to cope with mpls encapsulated packets. this is easy except for mpw, > where ether_ifattach does a lot of setup that has to be tweaked for mpls > encapsualted ethernet packets. I think that is fine. While the setup becomes more complex it is done just once. > while here, this changes mpe output so it doesnt have to prepend the > mbuf with the sockaddr it uses as the nexthop on the underlay. it only > had to do that to carry the information across the ifq. if we just don't > use ifq for output then this gets simplified a lot. the only downside is > that you can't use altq on mpe interfaces after this. i dont think this > is a huge loss. I'm fine with this loss. The result is a lot cleaner and simpler so right now I think this is better. > ok? Still untested but OK claudio@ > Index: net/if_mpe.c > =================================================================== > RCS file: /cvs/src/sys/net/if_mpe.c,v > diff -u -p -r1.107 if_mpe.c > --- net/if_mpe.c 7 Jul 2025 02:28:50 -0000 1.107 > +++ net/if_mpe.c 25 Nov 2025 00:58:24 -0000 > @@ -52,6 +52,7 @@ > > struct mpe_softc { > struct ifnet sc_if; /* the interface */ > + caddr_t sc_bpf; > int sc_txhprio; > int sc_rxhprio; > unsigned int sc_rdomain; > @@ -73,7 +74,7 @@ int mpe_ioctl(struct ifnet *, u_long, ca > void mpe_start(struct ifnet *); > int mpe_clone_create(struct if_clone *, int); > int mpe_clone_destroy(struct ifnet *); > -void mpe_input(struct ifnet *, struct mbuf *); > +void mpe_input(struct ifnet *, struct mbuf *, struct netstack *); > > struct if_clone mpe_cloner = > IF_CLONE_INITIALIZER("mpe", mpe_clone_create, mpe_clone_destroy); > @@ -106,8 +107,8 @@ mpe_clone_create(struct if_clone *ifc, i > ifp->if_softc = sc; > ifp->if_mtu = MPE_MTU; > ifp->if_ioctl = mpe_ioctl; > - ifp->if_bpf_mtap = p2p_bpf_mtap; > - ifp->if_input = p2p_input; > + ifp->if_bpf_mtap = bpf_mtap; > + ifp->if_input = mpe_input; > ifp->if_output = mpe_output; > ifp->if_start = mpe_start; > ifp->if_type = IFT_MPLS; > @@ -120,7 +121,8 @@ mpe_clone_create(struct if_clone *ifc, i > if_alloc_sadl(ifp); > > #if NBPFILTER > 0 > - bpfattach(&ifp->if_bpf, ifp, DLT_LOOP, sizeof(u_int32_t)); > + bpfattach(&sc->sc_bpf, ifp, DLT_LOOP, sizeof(u_int32_t)); > + bpfattach(&ifp->if_bpf, ifp, DLT_MPLS, 0); > #endif > > sc->sc_txhprio = 0; > @@ -167,76 +169,31 @@ mpe_clone_destroy(struct ifnet *ifp) > void > mpe_start(struct ifnet *ifp) > { > - struct mpe_softc *sc = ifp->if_softc; > - struct mbuf *m; > - struct sockaddr *sa; > - struct sockaddr smpls = { .sa_family = AF_MPLS }; > - struct rtentry *rt; > - struct ifnet *ifp0; > - > - while ((m = ifq_dequeue(&ifp->if_snd)) != NULL) { > - sa = mtod(m, struct sockaddr *); > - rt = rtalloc(sa, RT_RESOLVE, sc->sc_rdomain); > - if (!rtisvalid(rt)) { > - m_freem(m); > - rtfree(rt); > - continue; > - } > - > - ifp0 = if_get(rt->rt_ifidx); > - if (ifp0 == NULL) { > - m_freem(m); > - rtfree(rt); > - continue; > - } > - > - m_adj(m, sa->sa_len); > - > -#if NBPFILTER > 0 > - if (ifp->if_bpf) { > - /* remove MPLS label before passing packet to bpf */ > - m->m_data += sizeof(struct shim_hdr); > - m->m_len -= sizeof(struct shim_hdr); > - m->m_pkthdr.len -= sizeof(struct shim_hdr); > - bpf_mtap_af(ifp->if_bpf, m->m_pkthdr.ph_family, > - m, BPF_DIRECTION_OUT); > - m->m_data -= sizeof(struct shim_hdr); > - m->m_len += sizeof(struct shim_hdr); > - m->m_pkthdr.len += sizeof(struct shim_hdr); > - } > -#endif > - > - m->m_pkthdr.ph_rtableid = sc->sc_rdomain; > - CLR(m->m_flags, M_BCAST|M_MCAST); > - > - mpls_output(ifp0, m, &smpls, rt); > - if_put(ifp0); > - rtfree(rt); > - } > + ifq_purge(&ifp->if_snd); > } > > int > mpe_output(struct ifnet *ifp, struct mbuf *m, struct sockaddr *dst, > - struct rtentry *rt) > + struct rtentry *rt) > { > - struct mpe_softc *sc; > + struct mpe_softc *sc = ifp->if_softc; > struct rt_mpls *rtmpls; > + struct ifnet *ifp0 = NULL; > + struct rtentry *rt0 = NULL; > + struct sockaddr smpls = { .sa_family = AF_MPLS }; > struct shim_hdr shim; > - int error; > int txprio; > uint8_t ttl = mpls_defttl; > uint8_t tos, prio; > size_t ttloff; > - socklen_t slen; > + int error; > +#if NBPFILTER > 0 > + caddr_t if_bpf; > +#endif > > if (!rtisvalid(rt) || !ISSET(rt->rt_flags, RTF_MPLS)) { > - m_freem(m); > - return (ENETUNREACH); > - } > - > - if (dst->sa_family == AF_LINK && ISSET(rt->rt_flags, RTF_LOCAL)) { > - mpe_input(ifp, m); > - return (0); > + error = ENETUNREACH; > + goto drop; > } > > #ifdef DIAGNOSTIC > @@ -249,8 +206,8 @@ mpe_output(struct ifnet *ifp, struct mbu > > rtmpls = (struct rt_mpls *)rt->rt_llinfo; > if (rtmpls->mpls_operation != MPLS_OP_PUSH) { > - m_freem(m); > - return (ENETUNREACH); > + error = ENETUNREACH; > + goto drop; > } > > error = 0; > @@ -259,7 +216,6 @@ mpe_output(struct ifnet *ifp, struct mbu > struct ip *ip = mtod(m, struct ip *); > tos = ip->ip_tos; > ttloff = offsetof(struct ip, ip_ttl); > - slen = sizeof(struct sockaddr_in); > break; > } > #ifdef INET6 > @@ -268,15 +224,35 @@ mpe_output(struct ifnet *ifp, struct mbu > uint32_t flow = bemtoh32(&ip6->ip6_flow); > tos = flow >> 20; > ttloff = offsetof(struct ip6_hdr, ip6_hlim); > - slen = sizeof(struct sockaddr_in6); > break; > } > #endif > default: > - m_freem(m); > - return (EPFNOSUPPORT); > + error = EPFNOSUPPORT; > + goto drop; > + } > + > + rt0 = rtalloc(rt->rt_gateway, RT_RESOLVE, sc->sc_rdomain); > + if (!rtisvalid(rt0)) { > + error = EHOSTUNREACH; > + goto rtfree; > } > > + ifp0 = if_get(rt0->rt_ifidx); > + if (ifp0 == NULL) { > + error = ENETUNREACH; > + goto rtfree; > + } > + > + m->m_pkthdr.ph_rtableid = sc->sc_rdomain; > + CLR(m->m_flags, M_BCAST|M_MCAST); > + > +#if NBPFILTER > 0 > + if_bpf = sc->sc_bpf; > + if (if_bpf) > + bpf_mtap_af(if_bpf, dst->sa_family, m, BPF_DIRECTION_OUT); > +#endif > + > if (mpls_mapttl_ip) { > /* assumes the ip header is already contig */ > ttl = *(mtod(m, uint8_t *) + ttloff); > @@ -302,25 +278,27 @@ mpe_output(struct ifnet *ifp, struct mbu > > m = m_prepend(m, sizeof(shim), M_NOWAIT); > if (m == NULL) { > - error = ENOMEM; > - goto out; > + error = ENOBUFS; > + goto ifput; > } > *mtod(m, struct shim_hdr *) = shim; > > - m = m_prepend(m, slen, M_WAITOK); > - if (m == NULL) { > - error = ENOMEM; > - goto out; > - } > - memcpy(mtod(m, struct sockaddr *), rt->rt_gateway, slen); > - mtod(m, struct sockaddr *)->sa_len = slen; /* to be sure */ > +#if NBPFILTER > 0 > + if_bpf = ifp->if_bpf; > + bpf_mtap(ifp->if_bpf, m, BPF_DIRECTION_OUT); > +#endif > > - m->m_pkthdr.ph_family = dst->sa_family; > + mpls_output(ifp0, m, &smpls, rt0); > + if_put(ifp0); > + rtfree(rt0); > + return (0); > > - error = if_enqueue(ifp, m); > -out: > - if (error) > - ifp->if_oerrors++; > +ifput: > + if_put(ifp0); > +rtfree: > + rtfree(rt0); > +drop: > + m_freem(m); > return (error); > } > > @@ -457,7 +435,7 @@ mpe_ioctl(struct ifnet *ifp, u_long cmd, > } > > void > -mpe_input(struct ifnet *ifp, struct mbuf *m) > +mpe_input(struct ifnet *ifp, struct mbuf *m, struct netstack *ns) > { > struct mpe_softc *sc = ifp->if_softc; > struct shim_hdr *shim; > @@ -465,6 +443,9 @@ mpe_input(struct ifnet *ifp, struct mbuf > uint8_t ttl, tos; > uint32_t exp; > int rxprio = sc->sc_rxhprio; > +#if NBPFILTER > 0 > + caddr_t if_bpf; > +#endif > > shim = mtod(m, struct shim_hdr *); > exp = ntohl(shim->shim_label & MPLS_EXP_MASK) >> MPLS_EXP_OFFSET; > @@ -543,7 +524,15 @@ mpe_input(struct ifnet *ifp, struct mbuf > break; > } > > - if_vinput(ifp, m, NULL); > +#if NBPFILTER > 0 > + if_bpf = sc->sc_bpf; > + if (if_bpf) { > + bpf_mtap_af(if_bpf, m->m_pkthdr.ph_family, m, > + BPF_DIRECTION_IN); > + } > +#endif > + > + p2p_input(ifp, m, ns); > return; > drop: > m_freem(m); > Index: net/if_mpip.c > =================================================================== > RCS file: /cvs/src/sys/net/if_mpip.c,v > diff -u -p -r1.20 if_mpip.c > --- net/if_mpip.c 2 Mar 2025 21:28:32 -0000 1.20 > +++ net/if_mpip.c 25 Nov 2025 00:58:24 -0000 > @@ -52,6 +52,7 @@ struct mpip_neighbor { > > struct mpip_softc { > struct ifnet sc_if; > + caddr_t sc_bpf; > unsigned int sc_dead; > uint32_t sc_flow; /* xor for mbuf flowid */ > > @@ -71,6 +72,7 @@ void mpipattach(int); > int mpip_clone_create(struct if_clone *, int); > int mpip_clone_destroy(struct ifnet *); > int mpip_ioctl(struct ifnet *, u_long, caddr_t); > +void mpip_input(struct ifnet *, struct mbuf *, struct netstack *); > int mpip_output(struct ifnet *, struct mbuf *, struct sockaddr *, > struct rtentry *); > void mpip_start(struct ifnet *); > @@ -112,8 +114,8 @@ mpip_clone_create(struct if_clone *ifc, > ifp->if_flags = IFF_POINTOPOINT | IFF_MULTICAST; > ifp->if_xflags = IFXF_CLONED; > ifp->if_ioctl = mpip_ioctl; > - ifp->if_bpf_mtap = p2p_bpf_mtap; > - ifp->if_input = p2p_input; > + ifp->if_bpf_mtap = bpf_mtap; > + ifp->if_input = mpip_input; > ifp->if_output = mpip_output; > ifp->if_start = mpip_start; > ifp->if_rtrequest = p2p_rtrequest; > @@ -125,7 +127,8 @@ mpip_clone_create(struct if_clone *ifc, > if_alloc_sadl(ifp); > > #if NBPFILTER > 0 > - bpfattach(&ifp->if_bpf, ifp, DLT_LOOP, sizeof(uint32_t)); > + bpfattach(&sc->sc_bpf, ifp, DLT_LOOP, sizeof(uint32_t)); > + bpfattach(&ifp->if_bpf, ifp, DLT_MPLS, 0); > #endif > > refcnt_init_trace(&sc->sc_ifa.ifa_refcnt, DT_REFCNT_IDX_IFADDR); > @@ -462,14 +465,17 @@ mpip_ioctl(struct ifnet *ifp, u_long cmd > return (error); > } > > -static void > -mpip_input(struct mpip_softc *sc, struct mbuf *m) > +void > +mpip_input(struct ifnet *ifp, struct mbuf *m, struct netstack *ns) > { > - struct ifnet *ifp = &sc->sc_if; > + struct mpip_softc *sc = ifp->if_softc; > int rxprio = sc->sc_rxhprio; > uint32_t shim, exp; > struct mbuf *n; > uint8_t ttl, tos; > +#if NBPFILTER > 0 > + caddr_t if_bpf; > +#endif > > if (!ISSET(ifp->if_flags, IFF_RUNNING)) > goto drop; > @@ -608,7 +614,16 @@ mpip_input(struct mpip_softc *sc, struct > break; > } > > - if_vinput(ifp, m, NULL); > +#if NBPFILTER > 0 > + if_bpf = sc->sc_bpf; > + if (if_bpf) { > + if (bpf_mtap_af(if_bpf, m->m_pkthdr.ph_family, > + m, BPF_DIRECTION_OUT)) > + goto drop; > + } > +#endif /* NBPFILTER */ > + > + p2p_input(ifp, m, ns); > return; > drop: > m_freem(m); > @@ -618,15 +633,8 @@ int > mpip_output(struct ifnet *ifp, struct mbuf *m, struct sockaddr *dst, > struct rtentry *rt) > { > - struct mpip_softc *sc = ifp->if_softc; > int error; > > - if (dst->sa_family == AF_LINK && > - rt != NULL && ISSET(rt->rt_flags, RTF_LOCAL)) { > - mpip_input(sc, m); > - return (0); > - } > - > if (!ISSET(ifp->if_flags, IFF_RUNNING)) { > error = ENETDOWN; > goto drop; > @@ -691,7 +699,7 @@ mpip_start(struct ifnet *ifp) > > while ((m = ifq_dequeue(&ifp->if_snd)) != NULL) { > #if NBPFILTER > 0 > - caddr_t if_bpf = sc->sc_if.if_bpf; > + caddr_t if_bpf = sc->sc_bpf; > if (if_bpf) { > bpf_mtap_af(if_bpf, m->m_pkthdr.ph_family, > m, BPF_DIRECTION_OUT); > @@ -795,6 +803,12 @@ mpip_start(struct ifnet *ifp) > > m->m_pkthdr.ph_rtableid = sc->sc_rdomain; > CLR(m->m_flags, M_BCAST|M_MCAST); > + > +#if NBPFILTER > 0 > + if_bpf = ifp->if_bpf; > + if (if_bpf) > + bpf_mtap(if_bpf, m, BPF_DIRECTION_OUT); > +#endif /* NBPFILTER */ > > mpls_output(ifp0, m, (struct sockaddr *)&smpls, rt); > } > Index: net/if_mpw.c > =================================================================== > RCS file: /cvs/src/sys/net/if_mpw.c,v > diff -u -p -r1.68 if_mpw.c > --- net/if_mpw.c 7 Jul 2025 02:28:50 -0000 1.68 > +++ net/if_mpw.c 25 Nov 2025 00:58:24 -0000 > @@ -46,6 +46,7 @@ struct mpw_neighbor { > struct mpw_softc { > struct arpcom sc_ac; > #define sc_if sc_ac.ac_if > + caddr_t sc_bpf; > > int sc_txhprio; > int sc_rxhprio; > @@ -66,9 +67,8 @@ void mpwattach(int); > int mpw_clone_create(struct if_clone *, int); > int mpw_clone_destroy(struct ifnet *); > int mpw_ioctl(struct ifnet *, u_long, caddr_t); > -int mpw_output(struct ifnet *, struct mbuf *, struct sockaddr *, > - struct rtentry *); > void mpw_start(struct ifnet *); > +void mpw_input(struct ifnet *, struct mbuf *, struct netstack *); > > struct if_clone mpw_cloner = > IF_CLONE_INITIALIZER("mpw", mpw_clone_create, mpw_clone_destroy); > @@ -89,35 +89,43 @@ mpw_clone_create(struct if_clone *ifc, i > if (sc == NULL) > return (ENOMEM); > > + ifp = &sc->sc_if; > + > sc->sc_flow = arc4random(); > sc->sc_neighbor = NULL; > > - ifp = &sc->sc_if; > + sc->sc_txhprio = 0; > + sc->sc_rxhprio = IF_HDRPRIO_PACKET; > + sc->sc_rdomain = 0; > + refcnt_init_trace(&sc->sc_ifa.ifa_refcnt, DT_REFCNT_IDX_IFADDR); > + sc->sc_ifa.ifa_ifp = ifp; > + sc->sc_ifa.ifa_addr = sdltosa(ifp->if_sadl); > + sc->sc_smpls.smpls_len = sizeof(sc->sc_smpls); > + sc->sc_smpls.smpls_family = AF_MPLS; > + > snprintf(ifp->if_xname, sizeof(ifp->if_xname), "%s%d", > ifc->ifc_name, unit); > ifp->if_softc = sc; > ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST; > ifp->if_xflags = IFXF_CLONED; > ifp->if_ioctl = mpw_ioctl; > - ifp->if_output = mpw_output; > ifp->if_start = mpw_start; > ifp->if_hardmtu = ETHER_MAX_HARDMTU_LEN; > ether_fakeaddr(ifp); > > sc->sc_dead = 0; > > - if_counters_alloc(ifp); > - if_attach(ifp); > ether_ifattach(ifp); > + ifp->if_input = mpw_input; > > - sc->sc_txhprio = 0; > - sc->sc_rxhprio = IF_HDRPRIO_PACKET; > - sc->sc_rdomain = 0; > - refcnt_init_trace(&sc->sc_ifa.ifa_refcnt, DT_REFCNT_IDX_IFADDR); > - sc->sc_ifa.ifa_ifp = ifp; > - sc->sc_ifa.ifa_addr = sdltosa(ifp->if_sadl); > - sc->sc_smpls.smpls_len = sizeof(sc->sc_smpls); > - sc->sc_smpls.smpls_family = AF_MPLS; > +#if NBPFILTER > 0 > + bpfdetach(ifp); /* undo bpfattach in ether_ifattach */ > + bpfattach(&sc->sc_bpf, ifp, DLT_EN10MB, ETHER_HDR_LEN); > + bpfattach(&ifp->if_bpf, ifp, DLT_MPLS, 0); > +#endif > + > + if_counters_alloc(ifp); > + if_attach(ifp); > > return (0); > } > @@ -505,15 +513,18 @@ mpw_ioctl(struct ifnet *ifp, u_long cmd, > return (error); > } > > -static void > -mpw_input(struct mpw_softc *sc, struct mbuf *m) > +void > +mpw_input(struct ifnet *ifp, struct mbuf *m, struct netstack *ns) > { > - struct ifnet *ifp = &sc->sc_if; > + struct mpw_softc *sc = ifp->if_softc; > struct shim_hdr *shim; > struct mbuf *n; > uint32_t exp; > int rxprio; > int off; > +#if NBPFILTER > 0 > + caddr_t if_bpf; > +#endif > > if (!ISSET(ifp->if_flags, IFF_RUNNING)) > goto drop; > @@ -613,27 +624,18 @@ mpw_input(struct mpw_softc *sc, struct m > /* packet has not been processed by PF yet. */ > KASSERT(m->m_pkthdr.pf.statekey == NULL); > > - if_vinput(ifp, m, NULL); > +#if NBPFILTER > 0 > + if_bpf = sc->sc_bpf; > + if (if_bpf) > + bpf_mtap(if_bpf, m, BPF_DIRECTION_OUT); > +#endif > + > + ether_input(ifp, m, ns); > return; > drop: > m_freem(m); > } > > -int > -mpw_output(struct ifnet *ifp, struct mbuf *m, struct sockaddr *dst, > - struct rtentry *rt) > -{ > - struct mpw_softc *sc = ifp->if_softc; > - > - if (dst->sa_family == AF_LINK && > - rt != NULL && ISSET(rt->rt_flags, RTF_LOCAL)) { > - mpw_input(sc, m); > - return (0); > - } > - > - return (ether_output(ifp, m, dst, rt)); > -} > - > void > mpw_start(struct ifnet *ifp) > { > @@ -672,8 +674,11 @@ mpw_start(struct ifnet *ifp) > > while ((m = ifq_dequeue(&ifp->if_snd)) != NULL) { > #if NBPFILTER > 0 > - if (sc->sc_if.if_bpf) > - bpf_mtap(sc->sc_if.if_bpf, m, BPF_DIRECTION_OUT); > + caddr_t if_bpf; > + > + if_bpf = sc->sc_bpf; > + if (if_bpf) > + bpf_mtap(if_bpf, m, BPF_DIRECTION_OUT); > #endif /* NBPFILTER */ > > m0 = m_get(M_DONTWAIT, m->m_type); > @@ -731,6 +736,12 @@ mpw_start(struct ifnet *ifp) > shim = mtod(m0, struct shim_hdr *); > shim->shim_label = htonl(mpls_defttl) & MPLS_TTL_MASK; > shim->shim_label |= n->n_rshim.shim_label | exp | bos; > + > +#if NBPFILTER > 0 > + if_bpf = ifp->if_bpf; > + if (if_bpf) > + bpf_mtap(if_bpf, m, BPF_DIRECTION_OUT); > +#endif /* NBPFILTER */ > > m0->m_pkthdr.ph_rtableid = sc->sc_rdomain; > CLR(m0->m_flags, M_BCAST|M_MCAST); > Index: netmpls/mpls_input.c > =================================================================== > RCS file: /cvs/src/sys/netmpls/mpls_input.c,v > diff -u -p -r1.80 mpls_input.c > --- netmpls/mpls_input.c 2 Mar 2025 21:28:32 -0000 1.80 > +++ netmpls/mpls_input.c 25 Nov 2025 00:58:24 -0000 > @@ -44,7 +44,8 @@ > #endif > > struct mbuf *mpls_do_error(struct mbuf *, int, int, int); > -void mpls_input_local(struct rtentry *, struct mbuf *); > +static void mpls_input_local(struct rtentry *, struct mbuf *, > + struct netstack *); > > void > mpls_input(struct ifnet *ifp, struct mbuf *m, struct netstack *ns) > @@ -186,7 +187,7 @@ do_v6: > switch (rt_mpls->mpls_operation) { > case MPLS_OP_POP: > if (ISSET(rt->rt_flags, RTF_LOCAL)) { > - mpls_input_local(rt, m); > + mpls_input_local(rt, m, ns); > goto done; > } > > @@ -277,8 +278,8 @@ done: > rtfree(rt); > } > > -void > -mpls_input_local(struct rtentry *rt, struct mbuf *m) > +static void > +mpls_input_local(struct rtentry *rt, struct mbuf *m, struct netstack *ns) > { > struct ifnet *ifp; > > @@ -288,12 +289,7 @@ mpls_input_local(struct rtentry *rt, str > return; > } > > - /* shortcut sending out the packet */ > - if (!ISSET(ifp->if_xflags, IFXF_MPLS)) > - (*ifp->if_output)(ifp, m, rt->rt_gateway, rt); > - else > - (*ifp->if_ll_output)(ifp, m, rt->rt_gateway, rt); > - > + if_vinput(ifp, m, ns); > if_put(ifp); > } > > -- :wq Claudio