Index | Thread | Search

From:
Vitaliy Makkoveev <mvs@openbsd.org>
Subject:
sysctl: unlock IPCTL_IPPORT_MAXQUEUE case of ip_sysctl()
To:
Alexander Bluhm <bluhm@openbsd.org>, tech@openbsd.org
Date:
Mon, 30 Jun 2025 23:45:05 +0300

Download raw body.

Thread
  • Vitaliy Makkoveev:

    sysctl: unlock IPCTL_IPPORT_MAXQUEUE case of ip_sysctl()

We could set `ip_maxqueue' to be less than (ip_frags + 1) just after the
netlock is released. So we don't need to serializes `ip_frags' and
`ip_maxqueue' with the `ipq_mutex', the cached value is enough for
ip_fragcheck() run.

ok?

Index: sys/netinet/ip_input.c
===================================================================
RCS file: /cvs/src/sys/netinet/ip_input.c,v
diff -u -p -r1.416 ip_input.c
--- sys/netinet/ip_input.c	30 Jun 2025 12:43:22 -0000	1.416
+++ sys/netinet/ip_input.c	30 Jun 2025 13:35:36 -0000
@@ -86,8 +86,9 @@
 /*
  * Locks used to protect global variables in this file:
  *	I	immutable after creation
- *	a	atomic operations
  *	N	net lock
+ *	a	atomic operations
+ *	q	ipq_mutex
  */
 
 /* values controllable via sysctl */
@@ -101,15 +102,14 @@ int	ip_mtudisc = 1;				/* [a] */
 int	ip_mtudisc_timeout = IPMTUDISCTIMEOUT;	/* [a] */
 int	ip_directedbcast = 0;			/* [a] */
 
-/* Protects `ipq' and `ip_frags'. */
 struct mutex	ipq_mutex = MUTEX_INITIALIZER(IPL_SOFTNET);
 
 /* IP reassembly queue */
-LIST_HEAD(, ipq) ipq;
+LIST_HEAD(, ipq) ipq;				/* [q] */
 
 /* Keep track of memory used for reassembly */
-int	ip_maxqueue = 300;
-int	ip_frags = 0;
+int	ip_maxqueue = 300;			/* [a] */
+int	ip_frags = 0;				/* [q] */
 
 #ifndef SMALL_KERNEL
 const struct sysctl_bounded_args ipctl_vars_unlocked[] = {
@@ -124,10 +124,10 @@ const struct sysctl_bounded_args ipctl_v
 	{ IPCTL_IPPORT_LASTAUTO, &ipport_lastauto, 0, 65535 },
 	{ IPCTL_IPPORT_HIFIRSTAUTO, &ipport_hifirstauto, 0, 65535 },
 	{ IPCTL_IPPORT_HILASTAUTO, &ipport_hilastauto, 0, 65535 },
+	{ IPCTL_IPPORT_MAXQUEUE, &ip_maxqueue, 0, 10000 },
 };
 
 const struct sysctl_bounded_args ipctl_vars[] = {
-	{ IPCTL_IPPORT_MAXQUEUE, &ip_maxqueue, 0, 10000 },
 	{ IPCTL_MFORWARDING, &ipmforwarding, 0, 1 },
 	{ IPCTL_ARPTIMEOUT, &arpt_keep, 0, INT_MAX },
 	{ IPCTL_ARPDOWN, &arpt_down, 0, INT_MAX },
@@ -156,7 +156,7 @@ int	in_ouraddr(struct mbuf *, struct ifn
 int		ip_fragcheck(struct mbuf **, int *);
 struct mbuf *	ip_reass(struct ipqent *, struct ipq *);
 void		ip_freef(struct ipq *);
-void		ip_flush(void);
+void		ip_flush(int);
 
 static void ip_send_dispatch(void *);
 static void ip_sendraw_dispatch(void *);
@@ -698,9 +698,11 @@ ip_fragcheck(struct mbuf **mp, int *offp
 		 * attempt reassembly; if it succeeds, proceed.
 		 */
 		if (mff || ip->ip_off) {
+			int ip_maxqueue_local = atomic_load_int(&ip_maxqueue);
+
 			ipstat_inc(ips_fragments);
-			if (ip_frags + 1 > ip_maxqueue) {
-				ip_flush();
+			if (ip_frags + 1 > ip_maxqueue_local) {
+				ip_flush(ip_maxqueue_local);
 				ipstat_inc(ips_rcvmemdrop);
 				goto bad;
 			}
@@ -1171,13 +1173,13 @@ ip_slowtimo(void)
  * Flush a bunch of datagram fragments, till we are down to 75%.
  */
 void
-ip_flush(void)
+ip_flush(int maxqueue)
 {
 	int max = 50;
 
 	MUTEX_ASSERT_LOCKED(&ipq_mutex);
 
-	while (!LIST_EMPTY(&ipq) && ip_frags > ip_maxqueue * 3 / 4 && --max) {
+	while (!LIST_EMPTY(&ipq) && ip_frags > maxqueue * 3 / 4 && --max) {
 		ipstat_inc(ips_fragdropped);
 		ip_freef(LIST_FIRST(&ipq));
 	}
@@ -1841,6 +1843,7 @@ ip_sysctl(int *name, u_int namelen, void
 	case IPCTL_IPPORT_LASTAUTO:
 	case IPCTL_IPPORT_HIFIRSTAUTO:
 	case IPCTL_IPPORT_HILASTAUTO:
+	case IPCTL_IPPORT_MAXQUEUE:
 		return (sysctl_bounded_arr(
 		    ipctl_vars_unlocked, nitems(ipctl_vars_unlocked),
 		    name, namelen, oldp, oldlenp, newp, newlen));