Index | Thread | Search

From:
Alexander Bluhm <alexander.bluhm@gmx.net>
Subject:
UDP PCB table split IPv4 and IPv6
To:
tech@openbsd.org
Date:
Mon, 8 Jan 2024 09:59:21 +0100

Download raw body.

Thread
Hi,

Currently we have one UDP table for both IPv4 and IPv6 PCB.
I would like to split it like Raw IP and divert tables.

Then we have less contention on the table mutex when IPv4 and IPv6
are used simultaneously.  Also individual hash tables get smaller
and looping over all PCBs gets shorter.

ok?

bluhm

Index: kern/kern_sysctl.c
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/kern/kern_sysctl.c,v
diff -u -p -r1.420 kern_sysctl.c
--- kern/kern_sysctl.c	1 Oct 2023 15:58:12 -0000	1.420
+++ kern/kern_sysctl.c	7 Jan 2024 23:30:31 -0000
@@ -1493,6 +1493,12 @@ sysctl_file(int *name, u_int namelen, ch
 			TAILQ_FOREACH(inp, &udbtable.inpt_queue, inp_queue)
 				FILLSO(inp->inp_socket);
 			mtx_leave(&udbtable.inpt_mtx);
+#ifdef INET6
+			mtx_enter(&udb6table.inpt_mtx);
+			TAILQ_FOREACH(inp, &udb6table.inpt_queue, inp_queue)
+				FILLSO(inp->inp_socket);
+			mtx_leave(&udb6table.inpt_mtx);
+#endif
 			mtx_enter(&rawcbtable.inpt_mtx);
 			TAILQ_FOREACH(inp, &rawcbtable.inpt_queue, inp_queue)
 				FILLSO(inp->inp_socket);
Index: net/pf.c
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/net/pf.c,v
diff -u -p -r1.1192 pf.c
--- net/pf.c	1 Jan 2024 22:16:51 -0000	1.1192
+++ net/pf.c	7 Jan 2024 23:30:31 -0000
@@ -3841,6 +3841,8 @@ pf_socket_lookup(struct pf_pdesc *pd)
 		break;
 #ifdef INET6
 	case AF_INET6:
+		if (pd->virtual_proto == IPPROTO_UDP)
+			tb = &udb6table;
 		inp = in6_pcblookup(tb, &saddr->v6, sport, &daddr->v6,
 		    dport, pd->rdomain);
 		if (inp == NULL) {
Index: netinet/udp_usrreq.c
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/udp_usrreq.c,v
diff -u -p -r1.312 udp_usrreq.c
--- netinet/udp_usrreq.c	1 Dec 2023 15:30:47 -0000	1.312
+++ netinet/udp_usrreq.c	7 Jan 2024 23:30:31 -0000
@@ -161,6 +161,9 @@ const struct sysctl_bounded_args udpctl_
 };
 
 struct	inpcbtable udbtable;
+#ifdef INET6
+struct	inpcbtable udb6table;
+#endif
 struct	cpumem *udpcounters;
 
 void	udp_sbappend(struct inpcb *, struct mbuf *, struct ip *,
@@ -179,6 +182,9 @@ udp_init(void)
 {
 	udpcounters = counters_alloc(udps_ncounters);
 	in_pcbinit(&udbtable, UDB_INITIAL_HASH_SIZE);
+#ifdef INET6
+	in_pcbinit(&udb6table, UDB_INITIAL_HASH_SIZE);
+#endif
 }
 
 int
@@ -375,6 +381,7 @@ udp_input(struct mbuf **mp, int *offp, i
 
 	if (m->m_flags & (M_BCAST|M_MCAST)) {
 		SIMPLEQ_HEAD(, inpcb) inpcblist;
+		struct inpcbtable *tb;
 
 		/*
 		 * Deliver a multicast or broadcast datagram to *all* sockets
@@ -397,17 +404,24 @@ udp_input(struct mbuf **mp, int *offp, i
 		 * (Algorithm copied from raw_intr().)
 		 */
 		SIMPLEQ_INIT(&inpcblist);
-		rw_enter_write(&udbtable.inpt_notify);
-		mtx_enter(&udbtable.inpt_mtx);
-		TAILQ_FOREACH(inp, &udbtable.inpt_queue, inp_queue) {
+#ifdef INET6
+		if (ip6)
+			tb = &udb6table;
+		else
+#endif
+			tb = &udbtable;
+
+		rw_enter_write(&tb->inpt_notify);
+		mtx_enter(&tb->inpt_mtx);
+		TAILQ_FOREACH(inp, &tb->inpt_queue, inp_queue) {
 			if (inp->inp_socket->so_rcv.sb_state & SS_CANTRCVMORE)
 				continue;
 #ifdef INET6
-			/* don't accept it if AF does not match */
-			if (ip6 && !(inp->inp_flags & INP_IPV6))
-				continue;
-			if (!ip6 && (inp->inp_flags & INP_IPV6))
-				continue;
+			/* table is per AF, panic if it does not match */
+			if (ip6)
+				KASSERT(ISSET(inp->inp_flags, INP_IPV6));
+			else
+				KASSERT(!ISSET(inp->inp_flags, INP_IPV6));
 #endif
 			if (rtable_l2(inp->inp_rtableid) !=
 			    rtable_l2(m->m_pkthdr.ph_rtableid))
@@ -467,10 +481,10 @@ udp_input(struct mbuf **mp, int *offp, i
 			    SO_REUSEADDR)) == 0)
 				break;
 		}
-		mtx_leave(&udbtable.inpt_mtx);
+		mtx_leave(&tb->inpt_mtx);
 
 		if (SIMPLEQ_EMPTY(&inpcblist)) {
-			rw_exit_write(&udbtable.inpt_notify);
+			rw_exit_write(&tb->inpt_notify);
 
 			/*
 			 * No matching pcb found; discard datagram.
@@ -495,7 +509,7 @@ udp_input(struct mbuf **mp, int *offp, i
 			}
 			in_pcbunref(inp);
 		}
-		rw_exit_write(&udbtable.inpt_notify);
+		rw_exit_write(&tb->inpt_notify);
 
 		return IPPROTO_DONE;
 	}
@@ -507,25 +521,30 @@ udp_input(struct mbuf **mp, int *offp, i
 #endif
 	if (inp == NULL) {
 #ifdef INET6
-		if (ip6)
-			inp = in6_pcblookup(&udbtable, &ip6->ip6_src,
+		if (ip6) {
+			inp = in6_pcblookup(&udb6table, &ip6->ip6_src,
 			    uh->uh_sport, &ip6->ip6_dst, uh->uh_dport,
 			    m->m_pkthdr.ph_rtableid);
-		else
+		} else
 #endif /* INET6 */
-		inp = in_pcblookup(&udbtable, ip->ip_src, uh->uh_sport,
-		    ip->ip_dst, uh->uh_dport, m->m_pkthdr.ph_rtableid);
+		{
+			inp = in_pcblookup(&udbtable, ip->ip_src,
+			    uh->uh_sport, ip->ip_dst, uh->uh_dport,
+			    m->m_pkthdr.ph_rtableid);
+		}
 	}
 	if (inp == NULL) {
 		udpstat_inc(udps_pcbhashmiss);
 #ifdef INET6
 		if (ip6) {
-			inp = in6_pcblookup_listen(&udbtable, &ip6->ip6_dst,
+			inp = in6_pcblookup_listen(&udb6table, &ip6->ip6_dst,
 			    uh->uh_dport, m, m->m_pkthdr.ph_rtableid);
 		} else
 #endif /* INET6 */
-		inp = in_pcblookup_listen(&udbtable, ip->ip_dst,
-		    uh->uh_dport, m, m->m_pkthdr.ph_rtableid);
+		{
+			inp = in_pcblookup_listen(&udbtable, ip->ip_dst,
+			    uh->uh_dport, m, m->m_pkthdr.ph_rtableid);
+		}
 	}
 
 #ifdef IPSEC
@@ -809,7 +828,7 @@ udp6_ctlinput(int cmd, struct sockaddr *
 			 * corresponding to the address in the ICMPv6 message
 			 * payload.
 			 */
-			inp = in6_pcblookup(&udbtable, &sa6.sin6_addr,
+			inp = in6_pcblookup(&udb6table, &sa6.sin6_addr,
 			    uh.uh_dport, &sa6_src.sin6_addr, uh.uh_sport,
 			    rdomain);
 #if 0
@@ -821,7 +840,7 @@ udp6_ctlinput(int cmd, struct sockaddr *
 			 * is really ours.
 			 */
 			if (inp == NULL) {
-				inp = in6_pcblookup_listen(&udbtable,
+				inp = in6_pcblookup_listen(&udb6table,
 				    &sa6_src.sin6_addr, uh.uh_sport, NULL,
 				    rdomain))
 			}
@@ -847,10 +866,10 @@ udp6_ctlinput(int cmd, struct sockaddr *
 			 */
 		}
 
-		in6_pcbnotify(&udbtable, &sa6, uh.uh_dport,
+		in6_pcbnotify(&udb6table, &sa6, uh.uh_dport,
 		    &sa6_src, uh.uh_sport, rdomain, cmd, cmdarg, notify);
 	} else {
-		in6_pcbnotify(&udbtable, &sa6, 0,
+		in6_pcbnotify(&udb6table, &sa6, 0,
 		    &sa6_any, 0, rdomain, cmd, cmdarg, notify);
 	}
 }
@@ -1079,6 +1098,7 @@ release:
 int
 udp_attach(struct socket *so, int proto, int wait)
 {
+	struct inpcbtable *tb;
 	int error;
 
 	if (so->so_pcb != NULL)
@@ -1088,7 +1108,13 @@ udp_attach(struct socket *so, int proto,
 		return error;
 
 	NET_ASSERT_LOCKED();
-	if ((error = in_pcballoc(so, &udbtable, wait)))
+#ifdef INET6
+	if (so->so_proto->pr_domain->dom_family == PF_INET6)
+		tb = &udb6table;
+	else
+#endif
+		tb = &udbtable;
+	if ((error = in_pcballoc(so, tb, wait)))
 		return error;
 #ifdef INET6
 	if (sotoinpcb(so)->inp_flags & INP_IPV6)
Index: netinet/udp_var.h
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/udp_var.h,v
diff -u -p -r1.49 udp_var.h
--- netinet/udp_var.h	17 Oct 2022 14:49:02 -0000	1.49
+++ netinet/udp_var.h	7 Jan 2024 23:30:31 -0000
@@ -123,7 +123,7 @@ udpstat_inc(enum udpstat_counters c)
 	counters_inc(udpcounters, c);
 }
 
-extern struct	inpcbtable udbtable;
+extern struct	inpcbtable udbtable, udb6table;
 extern struct	udpstat udpstat;
 
 extern const struct pr_usrreqs udp_usrreqs;