Index | Thread | Search

From:
Claudio Jeker <cjeker@diehard.n-r-g.com>
Subject:
Re: get rid of the SRP code in src/sys/net/rtsock.c
To:
tech@openbsd.org
Date:
Tue, 19 Aug 2025 15:56:40 +0200

Download raw body.

Thread
On Tue, Aug 19, 2025 at 06:43:42PM +1000, David Gwynne wrote:
> i'm slowly getting rid of SRP code.

Thanks. I think here less is more.
 
> this code uses an SPRL to manage the list of rtsocks that route messages
> can be "recved" on. the obvious replacement would be an SMR list, but
> while you shouldn't sleep with an SRP ref, we do here and you can't do
> that inside an SMR read critical section.
> 
> to avoid the sleep while in the SMR crit section, i'd have to add refcnt
> operations, which means a bunch of atomic ops.

Yes, it gets nasty quickly. Been there a few times.
 
> alternatively, we could do what i did with the ethernet sockets and use
> an rwlock to protect the list of sockets, and take a read lock while
> traversing the list. this reduces the overall number of atomic ops and
> keeps the code in the recv loop much the same.
> 
> the downside is we have to be careful about lock ordering now. the recv
> path takes the list lock, and then the socket lock for queueing the
> messages. the socket setup and destroy code has the socket lock and
> wants to take the rwlock, so it needs to be careful and drop the socket
> lock first.
> 
> does this seem reasonable? ok?

I implemented the SPRL code long time ago as a trial to see if this can be
used to process packets in parallel. Mainly to be extended for UDP and raw
IP but those now use their own strategy using in_pcb_iterator. So there is
no more need for this special snowflake.

I think using a rwlock here is fine even thoug I hate sleeping with such a
global lock. I don't think the code in socket setup and destry are more
complex because of this lock.
 
> Index: rtsock.c
> ===================================================================
> RCS file: /cvs/src/sys/net/rtsock.c,v
> diff -u -p -r1.386 rtsock.c
> --- rtsock.c	15 Jul 2025 09:55:49 -0000	1.386
> +++ rtsock.c	8 Aug 2025 05:54:47 -0000
> @@ -70,7 +70,6 @@
>  #include <sys/domain.h>
>  #include <sys/pool.h>
>  #include <sys/protosw.h>
> -#include <sys/srp.h>
>  
>  #include <net/if.h>
>  #include <net/if_dl.h>
> @@ -104,8 +103,6 @@ struct walkarg {
>  };
>  
>  void	route_prinit(void);
> -void	rcb_ref(void *, void *);
> -void	rcb_unref(void *, void *);
>  int	route_output(struct mbuf *, struct socket *);
>  int	route_ctloutput(int, struct socket *, int, int, struct mbuf *);
>  int	route_attach(struct socket *, int, int);
> @@ -148,12 +145,13 @@ int		 rt_setsource(unsigned int, const s
>   * Locks used to protect struct members
>   *       I       immutable after creation
>   *       s       solock
> + *
> + * Lock order: rtptable.rtp_lk -> solock
>   */
>  struct rtpcb {
>  	struct socket		*rop_socket;		/* [I] */
>  
> -	SRPL_ENTRY(rtpcb)	rop_list;
> -	struct refcnt		rop_refcnt;
> +	TAILQ_ENTRY(rtpcb)	rop_list;
>  	struct timeout		rop_timeout;
>  	unsigned int		rop_msgfilter;		/* [s] */
>  	unsigned int		rop_flagfilter;		/* [s] */
> @@ -165,8 +163,7 @@ struct rtpcb {
>  #define	sotortpcb(so)	((struct rtpcb *)(so)->so_pcb)
>  
>  struct rtptable {
> -	SRPL_HEAD(, rtpcb)	rtp_list;
> -	struct srpl_rc		rtp_rc;
> +	TAILQ_HEAD(, rtpcb)	rtp_list;
>  	struct rwlock		rtp_lk;
>  	unsigned int		rtp_count;
>  };
> @@ -188,35 +185,20 @@ struct rtptable rtptable;
>  void
>  route_prinit(void)
>  {
> -	srpl_rc_init(&rtptable.rtp_rc, rcb_ref, rcb_unref, NULL);
>  	rw_init(&rtptable.rtp_lk, "rtsock");
> -	SRPL_INIT(&rtptable.rtp_list);
> +	TAILQ_INIT(&rtptable.rtp_list);
>  	pool_init(&rtpcb_pool, sizeof(struct rtpcb), 0,
>  	    IPL_SOFTNET, PR_WAITOK, "rtpcb", NULL);
>  }
>  
> -void
> -rcb_ref(void *null, void *v)
> -{
> -	struct rtpcb *rop = v;
> -
> -	refcnt_take(&rop->rop_refcnt);
> -}
> -
> -void
> -rcb_unref(void *null, void *v)
> -{
> -	struct rtpcb *rop = v;
> -
> -	refcnt_rele_wake(&rop->rop_refcnt);
> -}
> -
>  int
>  route_attach(struct socket *so, int proto, int wait)
>  {
>  	struct rtpcb	*rop;
>  	int		 error;
>  
> +	soassertlocked(so);
> +
>  	error = soreserve(so, ROUTESNDQ, ROUTERCVQ);
>  	if (error)
>  		return (error);
> @@ -233,7 +215,6 @@ route_attach(struct socket *so, int prot
>  	/* Init the timeout structure */
>  	timeout_set_flags(&rop->rop_timeout, rtm_senddesync_timer, so,
>  	    KCLOCK_NONE, TIMEOUT_PROC | TIMEOUT_MPSAFE);
> -	refcnt_init(&rop->rop_refcnt);
>  
>  	rop->rop_socket = so;
>  	rop->rop_proto = proto;
> @@ -243,10 +224,15 @@ route_attach(struct socket *so, int prot
>  	soisconnected(so);
>  	so->so_options |= SO_USELOOPBACK;
>  
> +	/* Give up solock before taking rtp_lk for the lock ordering. */
> +	soref(so); /* Take a ref for the list */
> +	sounlock(so);
> +
>  	rw_enter(&rtptable.rtp_lk, RW_WRITE);
> -	SRPL_INSERT_HEAD_LOCKED(&rtptable.rtp_rc, &rtptable.rtp_list, rop,
> -	    rop_list);
> +	TAILQ_INSERT_TAIL(&rtptable.rtp_list, rop, rop_list);
>  	rtptable.rtp_count++;
> +
> +	solock(so);
>  	rw_exit(&rtptable.rtp_lk);
>  
>  	return (0);
> @@ -263,20 +249,19 @@ route_detach(struct socket *so)
>  	if (rop == NULL)
>  		return (EINVAL);
>  
> -	rw_enter(&rtptable.rtp_lk, RW_WRITE);
> +	/* Give up solock before taking rtp_lk for the lock ordering. */
> +	sounlock(so);
>  
> +	rw_enter(&rtptable.rtp_lk, RW_WRITE);
>  	rtptable.rtp_count--;
> -	SRPL_REMOVE_LOCKED(&rtptable.rtp_rc, &rtptable.rtp_list, rop, rtpcb,
> -	    rop_list);
> +	TAILQ_REMOVE(&rtptable.rtp_list, rop, rop_list);
>  	rw_exit(&rtptable.rtp_lk);
>  
> -	sounlock(so);
> -
>  	/* wait for all references to drop */
> -	refcnt_finalize(&rop->rop_refcnt, "rtsockrefs");
>  	timeout_del_barrier(&rop->rop_timeout);
>  
>  	solock(so);
> +	sorele(so); /* Release the ref the list had */
>  
>  	so->so_pcb = NULL;
>  	KASSERT((so->so_state & SS_NOFDREF) == 0);
> @@ -501,7 +486,6 @@ route_input(struct mbuf *m0, struct sock
>  	struct rtpcb *rop;
>  	struct rt_msghdr *rtm;
>  	struct mbuf *m = m0;
> -	struct srp_ref sr;
>  
>  	/* ensure that we can access the rtm_type via mtod() */
>  	if (m->m_len < offsetof(struct rt_msghdr, rtm_type) + 1) {
> @@ -509,7 +493,8 @@ route_input(struct mbuf *m0, struct sock
>  		return;
>  	}
>  
> -	SRPL_FOREACH(rop, &sr, &rtptable.rtp_list, rop_list) {
> +	rw_enter_read(&rtptable.rtp_lk);
> +	TAILQ_FOREACH(rop, &rtptable.rtp_list, rop_list) {
>  		/*
>  		 * If route socket is bound to an address family only send
>  		 * messages that match the address family. Address family
> @@ -580,7 +565,7 @@ route_input(struct mbuf *m0, struct sock
>  next:
>  		sounlock(so);
>  	}
> -	SRPL_LEAVE(&sr);
> +	rw_exit_read(&rtptable.rtp_lk);
>  
>  	m_freem(m);
>  }
> 

-- 
:wq Claudio