Index | Thread | Search

From:
Alexander Bluhm <bluhm@openbsd.org>
Subject:
Re: limit softnet threads to number of cpu
To:
tech@openbsd.org
Date:
Sat, 30 Aug 2025 00:13:42 +0200

Download raw body.

Thread
On Sat, Aug 09, 2025 at 05:07:29PM +0200, Alexander Bluhm wrote:
> On Sat, Aug 09, 2025 at 02:50:32PM +0200, Mark Kettenis wrote:
> > > Date: Tue, 5 Aug 2025 13:34:45 +0200
> > > From: Alexander Bluhm <bluhm@openbsd.org>
> > 
> > I didn't think too deeply about this yet, but creating the threads and
> > then killing them seems a little bit silly.  So a few ideas:
> 
> The threads are not really killed.  The task is created, threads
> are defered.  Before the threads are created, needless tasks are
> destroyed.  At least i understand the code this way.
> 
> > * By the time we actually start running other kernel threads, the
> >   number of cCPUs that attached is know.  So I think the softnet
> >   threads can/should be created later that you do now.
> 
> The problem are the tasks.  The interface driver attach code calls
> ifq_init() for each queue.  And here each softnet task is linked
> to the queue.  Task must exist when driveres are in autoconf.
> 
> > * Another strategy is to create one softnet thread up front and then
> >   create more (if necessary) later instead of destroying them again.
> 
> And how to get that into the attch code of every multiqueue driver?
> 
> bluhm
> 
> > > On Sat, Aug 02, 2025 at 10:18:11PM +0200, Alexander Bluhm wrote:
> > > > Hi,
> > > > 
> > > > Currently always 8 softnet threads are startet, but only up to
> > > > number of CPU are used.  I would like to remove useless threads.
> > > > 
> > > > Problem is that softnet tasks must be initialized before autoconf
> > > > is running.  Drivers need them to attach queues.  But number of CPU
> > > > is known only after autoconf has discovert them.
> > > > 
> > > > So I split the code into softnet_init() and softnet_percpu().  The
> > > > latter removes task queues that are not needed.  Threads have not
> > > > been created yet.
> > > > 
> > > > I have a follow up diff that pins softnet threads to specific CPU.
> > > > But that needs more testing.  I would like to commit this thread
> > > > handling diff first.
> > > > 
> > > > ok?
> > > 
> > > Anyone?  Or a better idea how to avoid useless threads?
> > > 
> > > bluhm

As my diff calls softnet_percpu() way before kthread_run_deferred_queue()
within main(), no needless threads are created.  I could not find
any better way to avoid these useless threads.  Noone needs more
softnet threads than CPUs.

ok?

bluhm

Index: kern/init_main.c
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/kern/init_main.c,v
diff -u -p -r1.329 init_main.c
--- kern/init_main.c	9 Jun 2025 10:57:46 -0000	1.329
+++ kern/init_main.c	29 Aug 2025 22:03:14 -0000
@@ -328,6 +328,7 @@ main(void *framep)
 
 	/* Initialize the interface/address trees */
 	ifinit();
+	softnet_init();
 
 	/* Lock the kernel on behalf of proc0. */
 	KERNEL_LOCK();
@@ -345,6 +346,9 @@ main(void *framep)
 
 	/* Per CPU memory allocation */
 	percpu_init();
+
+	/* Reduce softnet threads to number of CPU */
+	softnet_percpu();
 
 	/* Initialize the file systems. */
 #if defined(NFSSERVER) || defined(NFSCLIENT)
Index: net/if.c
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/net/if.c,v
diff -u -p -r1.740 if.c
--- net/if.c	21 Jul 2025 20:36:41 -0000	1.740
+++ net/if.c	29 Aug 2025 22:03:14 -0000
@@ -237,7 +237,11 @@ struct softnet {
 	struct taskq	*sn_taskq;
 	struct netstack	 sn_netstack;
 } __aligned(64);
+#ifdef MULTIPROCESSOR
 #define NET_TASKQ	8
+#else
+#define NET_TASKQ	1
+#endif
 struct softnet	softnets[NET_TASKQ];
 
 struct task	if_input_task_locked = TASK_INITIALIZER(if_netisr, NULL);
@@ -255,16 +259,22 @@ struct rwlock netlock = RWLOCK_INITIALIZ
 void
 ifinit(void)
 {
-	unsigned int	i;
-
 	/*
 	 * most machines boot with 4 or 5 interfaces, so size the initial map
 	 * to accommodate this
 	 */
 	if_idxmap_init(8); /* 8 is a nice power of 2 for malloc */
+}
 
+void
+softnet_init(void)
+{
+	unsigned int i;
+
+	/* Number of CPU is unknown, but driver attach needs softnet tasks. */
 	for (i = 0; i < NET_TASKQ; i++) {
 		struct softnet *sn = &softnets[i];
+
 		snprintf(sn->sn_name, sizeof(sn->sn_name), "softnet%u", i);
 		sn->sn_taskq = taskq_create(sn->sn_name, 1, IPL_NET,
 		    TASKQ_MPSAFE);
@@ -273,6 +283,31 @@ ifinit(void)
 	}
 }
 
+#ifdef MULTIPROCESSOR
+
+void
+softnet_percpu(void)
+{
+	unsigned int i;
+
+	/* After attaching all CPUs and interfaces, remove useless threads. */
+	for (i = softnet_count(); i < NET_TASKQ; i++) {
+		struct softnet *sn = &softnets[i];
+
+		taskq_destroy(sn->sn_taskq);
+		sn->sn_taskq = NULL;
+	}
+}
+
+#else /* MULTIPROCESSOR */
+
+void
+softnet_percpu(void)
+{
+}
+
+#endif /* MULTIPROCESSOR */
+
 static struct if_idxmap if_idxmap;
 
 /*
@@ -3642,10 +3677,10 @@ unhandled_af(int af)
 	panic("unhandled af %d", af);
 }
 
-int
-net_sn_count(void)
+unsigned int
+softnet_count(void)
 {
-	static int nsoftnets;
+	static unsigned int nsoftnets;
 
 	if (nsoftnets == 0)
 		nsoftnets = min(NET_TASKQ, ncpus);
@@ -3656,7 +3691,7 @@ net_sn_count(void)
 struct softnet *
 net_sn(unsigned int ifindex)
 {
-	return (&softnets[ifindex % net_sn_count()]);
+	return (&softnets[ifindex % softnet_count()]);
 }
 
 struct taskq *
@@ -3672,7 +3707,7 @@ net_tq_barriers(const char *wmesg)
 	struct refcnt r = REFCNT_INITIALIZER();
 	int i;
 
-	for (i = 0; i < nitems(barriers); i++) {
+	for (i = 0; i < softnet_count(); i++) {
 		task_set(&barriers[i], (void (*)(void *))refcnt_rele_wake, &r);
 		refcnt_take(&r);
 		task_add(softnets[i].sn_taskq, &barriers[i]);
Index: net/if.h
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/net/if.h,v
diff -u -p -r1.220 if.h
--- net/if.h	19 Jul 2025 16:40:40 -0000	1.220
+++ net/if.h	29 Aug 2025 22:03:14 -0000
@@ -561,10 +561,13 @@ void	if_congestion(void);
 int	if_congested(void);
 __dead void	unhandled_af(int);
 int	if_setlladdr(struct ifnet *, const uint8_t *);
+void	softnet_init(void);
+void	softnet_percpu(void);
+unsigned int
+	softnet_count(void);
 struct taskq *
 	net_tq(unsigned int);
 void	net_tq_barriers(const char *);
-int	net_sn_count(void);
 
 #endif /* _KERNEL */
 
Index: net/if_loop.c
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/net/if_loop.c,v
diff -u -p -r1.102 if_loop.c
--- net/if_loop.c	7 Jul 2025 02:28:50 -0000	1.102
+++ net/if_loop.c	29 Aug 2025 22:03:14 -0000
@@ -178,8 +178,8 @@ loop_clone_create(struct if_clone *ifc, 
 		rtable_l2set(0, 0, ifp->if_index);
 	} else
 		if_attach(ifp);
-	if_attach_queues(ifp, net_sn_count());
-	if_attach_iqueues(ifp, net_sn_count());
+	if_attach_queues(ifp, softnet_count());
+	if_attach_iqueues(ifp, softnet_count());
 	if_alloc_sadl(ifp);
 #if NBPFILTER > 0
 	bpfattach(&ifp->if_bpf, ifp, DLT_LOOP, sizeof(u_int32_t));