Index | Thread | Search

From:
Claudio Jeker <cjeker@diehard.n-r-g.com>
Subject:
Re: Please test: wg(4): drop "while (!ifq_empty())" hack in wg_peer_destroy()
To:
Vitaliy Makkoveev <mvs@openbsd.org>
Cc:
tech@openbsd.org
Date:
Fri, 19 Jan 2024 15:17:28 +0100

Download raw body.

Thread
On Fri, Jan 19, 2024 at 05:07:39PM +0300, Vitaliy Makkoveev wrote:
> Hi,
> 
> wg(4) stores reference to peer in the outgoing mbuf. Since it doesn't
> use reference counting, to make the `peer' dereference safe within
> wg_qstart(), wg_peer_destroy() has the following delay loop:
> 
>         NET_LOCK();
>         while (!ifq_empty(&sc->sc_if.if_snd)) {
>                 /*
>                  * XXX: `if_snd' of stopped interface could still
>                  * contain packets
>                  */
>                 if (!ISSET(sc->sc_if.if_flags, IFF_RUNNING)) {
>                         ifq_purge(&sc->sc_if.if_snd);
>                         continue;
>                 }
>                 NET_UNLOCK();
>                 tsleep_nsec(&nowake, PWAIT, "wg_ifq", 1000);
>                 NET_LOCK();
>         }
>         NET_UNLOCK();
> 
> Regardless on exclusive netlock acquisistion which stops packet
> processing, this loop also could became infinite on huge traffic. I
> want to remove it. Unfortunately, we can't use reference counting for
> outgoing packets because they could stuck within `if_snd' of stopped
> interface. These packets will be removed on interface destruction, so
> the reference of `peer' will never be released.
> 
> There is another way: to do wg_aip_lookup() for each packet within
> wg_qstart(). This diff implements it. Obviously, wg_aip_lookup()
> provides some performance impact, so I'm asking for help with testing
> this diff on configuration with huge amount of peers and traffic
> pressure. I'm interesting what is the performance impact and is the
> performance impact acceptable. Of course any other help with testing is
> also welcomed.
> 
> Also, this diff restores pf(4) delay option for wg(4) interfaces.
> 
> Index: sys/net/if_wg.c
> ===================================================================
> RCS file: /cvs/src/sys/net/if_wg.c,v
> retrieving revision 1.36
> diff -u -p -r1.36 if_wg.c
> --- sys/net/if_wg.c	18 Jan 2024 08:46:41 -0000	1.36
> +++ sys/net/if_wg.c	19 Jan 2024 13:45:06 -0000
> @@ -507,22 +507,6 @@ wg_peer_destroy(struct wg_peer *peer)
>  
>  	noise_remote_clear(&peer->p_remote);
>  
> -	NET_LOCK();
> -	while (!ifq_empty(&sc->sc_if.if_snd)) {
> -		/*
> -		 * XXX: `if_snd' of stopped interface could still
> -		 * contain packets
> -		 */
> -		if (!ISSET(sc->sc_if.if_flags, IFF_RUNNING)) {
> -			ifq_purge(&sc->sc_if.if_snd);
> -			continue;
> -		}
> -		NET_UNLOCK();
> -		tsleep_nsec(&nowake, PWAIT, "wg_ifq", 1000);
> -		NET_LOCK();
> -	}
> -	NET_UNLOCK();
> -
>  	taskq_barrier(wg_crypt_taskq);
>  	taskq_barrier(net_tq(sc->sc_if.if_index));
>  
> @@ -2092,20 +2076,39 @@ wg_qstart(struct ifqueue *ifq)
>  	struct ifnet		*ifp = ifq->ifq_if;
>  	struct wg_softc		*sc = ifp->if_softc;
>  	struct wg_peer		*peer;
> -	struct wg_tag		*t;
>  	struct mbuf		*m;
>  	SLIST_HEAD(,wg_peer)	 start_list;
>  
>  	SLIST_INIT(&start_list);
>  
> +	rw_enter_read(&sc->sc_lock);
>  	/*
>  	 * We should be OK to modify p_start_list, p_start_onlist in this
>  	 * function as there should only be one ifp->if_qstart invoked at a
>  	 * time.
>  	 */
>  	while ((m = ifq_dequeue(ifq)) != NULL) {
> -		t = wg_tag_get(m);
> -		peer = t->t_peer;
> +		switch (m->m_pkthdr.ph_family) {
> +		case AF_INET:
> +			peer = wg_aip_lookup(sc->sc_aip4,
> +			    &mtod(m, struct ip *)->ip_dst);
> +			break;
> +#ifdef INET6
> +		case AF_INET6:
> +			peer = wg_aip_lookup(sc->sc_aip6,
> +			    &mtod(m, struct ip6_hdr *)->ip6_dst);
> +			break;
> +#endif
> +		default:
> +			m_freem(m);
> +			continue;
> +		}
> +
> +		if (peer == NULL) {
> +			m_freem(m);
> +			continue;
> +		}
> +
>  		if (mq_push(&peer->p_stage_queue, m) != 0)
>  			counters_inc(ifp->if_counters, ifc_oqdrops);
>  		if (!peer->p_start_onlist) {
> @@ -2120,6 +2123,8 @@ wg_qstart(struct ifqueue *ifq)
>  			wg_timers_event_want_initiation(&peer->p_timers);
>  		peer->p_start_onlist = 0;
>  	}
> +	rw_exit_read(&sc->sc_lock);
> +
>  	task_add(wg_crypt_taskq, &sc->sc_encap);
>  }
>  
> @@ -2175,19 +2180,6 @@ wg_output(struct ifnet *ifp, struct mbuf
>  	if (m->m_pkthdr.ph_loopcnt++ > M_MAXLOOP) {
>  		DPRINTF(sc, "Packet looped\n");
>  		ret = ELOOP;
> -		goto error;
> -	}
> -
> -	/*
> -	 * As we hold a reference to peer in the mbuf, we can't handle a
> -	 * delayed packet without doing some refcnting. If a peer is removed
> -	 * while a delayed holds a reference, bad things will happen. For the
> -	 * time being, delayed packets are unsupported. This may be fixed with
> -	 * another aip_lookup in wg_qstart, or refcnting as mentioned before.
> -	 */
> -	if (m->m_pkthdr.pf.delay > 0) {
> -		DPRINTF(sc, "PF delay unsupported\n");
> -		ret = EOPNOTSUPP;
>  		goto error;
>  	}
>  
> 

I would prefer if you actually removed t_peer from the wg_tag. At least
then it is clear that nothing depends on this hack anymore.

-- 
:wq Claudio