Download raw body.
UDP PCB table split IPv4 and IPv6
Hi,
Currently we have one UDP table for both IPv4 and IPv6 PCB.
I would like to split it like Raw IP and divert tables.
Then we have less contention on the table mutex when IPv4 and IPv6
are used simultaneously. Also individual hash tables get smaller
and looping over all PCBs gets shorter.
ok?
bluhm
Index: kern/kern_sysctl.c
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/kern/kern_sysctl.c,v
diff -u -p -r1.420 kern_sysctl.c
--- kern/kern_sysctl.c 1 Oct 2023 15:58:12 -0000 1.420
+++ kern/kern_sysctl.c 7 Jan 2024 23:30:31 -0000
@@ -1493,6 +1493,12 @@ sysctl_file(int *name, u_int namelen, ch
TAILQ_FOREACH(inp, &udbtable.inpt_queue, inp_queue)
FILLSO(inp->inp_socket);
mtx_leave(&udbtable.inpt_mtx);
+#ifdef INET6
+ mtx_enter(&udb6table.inpt_mtx);
+ TAILQ_FOREACH(inp, &udb6table.inpt_queue, inp_queue)
+ FILLSO(inp->inp_socket);
+ mtx_leave(&udb6table.inpt_mtx);
+#endif
mtx_enter(&rawcbtable.inpt_mtx);
TAILQ_FOREACH(inp, &rawcbtable.inpt_queue, inp_queue)
FILLSO(inp->inp_socket);
Index: net/pf.c
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/net/pf.c,v
diff -u -p -r1.1192 pf.c
--- net/pf.c 1 Jan 2024 22:16:51 -0000 1.1192
+++ net/pf.c 7 Jan 2024 23:30:31 -0000
@@ -3841,6 +3841,8 @@ pf_socket_lookup(struct pf_pdesc *pd)
break;
#ifdef INET6
case AF_INET6:
+ if (pd->virtual_proto == IPPROTO_UDP)
+ tb = &udb6table;
inp = in6_pcblookup(tb, &saddr->v6, sport, &daddr->v6,
dport, pd->rdomain);
if (inp == NULL) {
Index: netinet/udp_usrreq.c
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/udp_usrreq.c,v
diff -u -p -r1.312 udp_usrreq.c
--- netinet/udp_usrreq.c 1 Dec 2023 15:30:47 -0000 1.312
+++ netinet/udp_usrreq.c 7 Jan 2024 23:30:31 -0000
@@ -161,6 +161,9 @@ const struct sysctl_bounded_args udpctl_
};
struct inpcbtable udbtable;
+#ifdef INET6
+struct inpcbtable udb6table;
+#endif
struct cpumem *udpcounters;
void udp_sbappend(struct inpcb *, struct mbuf *, struct ip *,
@@ -179,6 +182,9 @@ udp_init(void)
{
udpcounters = counters_alloc(udps_ncounters);
in_pcbinit(&udbtable, UDB_INITIAL_HASH_SIZE);
+#ifdef INET6
+ in_pcbinit(&udb6table, UDB_INITIAL_HASH_SIZE);
+#endif
}
int
@@ -375,6 +381,7 @@ udp_input(struct mbuf **mp, int *offp, i
if (m->m_flags & (M_BCAST|M_MCAST)) {
SIMPLEQ_HEAD(, inpcb) inpcblist;
+ struct inpcbtable *tb;
/*
* Deliver a multicast or broadcast datagram to *all* sockets
@@ -397,17 +404,24 @@ udp_input(struct mbuf **mp, int *offp, i
* (Algorithm copied from raw_intr().)
*/
SIMPLEQ_INIT(&inpcblist);
- rw_enter_write(&udbtable.inpt_notify);
- mtx_enter(&udbtable.inpt_mtx);
- TAILQ_FOREACH(inp, &udbtable.inpt_queue, inp_queue) {
+#ifdef INET6
+ if (ip6)
+ tb = &udb6table;
+ else
+#endif
+ tb = &udbtable;
+
+ rw_enter_write(&tb->inpt_notify);
+ mtx_enter(&tb->inpt_mtx);
+ TAILQ_FOREACH(inp, &tb->inpt_queue, inp_queue) {
if (inp->inp_socket->so_rcv.sb_state & SS_CANTRCVMORE)
continue;
#ifdef INET6
- /* don't accept it if AF does not match */
- if (ip6 && !(inp->inp_flags & INP_IPV6))
- continue;
- if (!ip6 && (inp->inp_flags & INP_IPV6))
- continue;
+ /* table is per AF, panic if it does not match */
+ if (ip6)
+ KASSERT(ISSET(inp->inp_flags, INP_IPV6));
+ else
+ KASSERT(!ISSET(inp->inp_flags, INP_IPV6));
#endif
if (rtable_l2(inp->inp_rtableid) !=
rtable_l2(m->m_pkthdr.ph_rtableid))
@@ -467,10 +481,10 @@ udp_input(struct mbuf **mp, int *offp, i
SO_REUSEADDR)) == 0)
break;
}
- mtx_leave(&udbtable.inpt_mtx);
+ mtx_leave(&tb->inpt_mtx);
if (SIMPLEQ_EMPTY(&inpcblist)) {
- rw_exit_write(&udbtable.inpt_notify);
+ rw_exit_write(&tb->inpt_notify);
/*
* No matching pcb found; discard datagram.
@@ -495,7 +509,7 @@ udp_input(struct mbuf **mp, int *offp, i
}
in_pcbunref(inp);
}
- rw_exit_write(&udbtable.inpt_notify);
+ rw_exit_write(&tb->inpt_notify);
return IPPROTO_DONE;
}
@@ -507,25 +521,30 @@ udp_input(struct mbuf **mp, int *offp, i
#endif
if (inp == NULL) {
#ifdef INET6
- if (ip6)
- inp = in6_pcblookup(&udbtable, &ip6->ip6_src,
+ if (ip6) {
+ inp = in6_pcblookup(&udb6table, &ip6->ip6_src,
uh->uh_sport, &ip6->ip6_dst, uh->uh_dport,
m->m_pkthdr.ph_rtableid);
- else
+ } else
#endif /* INET6 */
- inp = in_pcblookup(&udbtable, ip->ip_src, uh->uh_sport,
- ip->ip_dst, uh->uh_dport, m->m_pkthdr.ph_rtableid);
+ {
+ inp = in_pcblookup(&udbtable, ip->ip_src,
+ uh->uh_sport, ip->ip_dst, uh->uh_dport,
+ m->m_pkthdr.ph_rtableid);
+ }
}
if (inp == NULL) {
udpstat_inc(udps_pcbhashmiss);
#ifdef INET6
if (ip6) {
- inp = in6_pcblookup_listen(&udbtable, &ip6->ip6_dst,
+ inp = in6_pcblookup_listen(&udb6table, &ip6->ip6_dst,
uh->uh_dport, m, m->m_pkthdr.ph_rtableid);
} else
#endif /* INET6 */
- inp = in_pcblookup_listen(&udbtable, ip->ip_dst,
- uh->uh_dport, m, m->m_pkthdr.ph_rtableid);
+ {
+ inp = in_pcblookup_listen(&udbtable, ip->ip_dst,
+ uh->uh_dport, m, m->m_pkthdr.ph_rtableid);
+ }
}
#ifdef IPSEC
@@ -809,7 +828,7 @@ udp6_ctlinput(int cmd, struct sockaddr *
* corresponding to the address in the ICMPv6 message
* payload.
*/
- inp = in6_pcblookup(&udbtable, &sa6.sin6_addr,
+ inp = in6_pcblookup(&udb6table, &sa6.sin6_addr,
uh.uh_dport, &sa6_src.sin6_addr, uh.uh_sport,
rdomain);
#if 0
@@ -821,7 +840,7 @@ udp6_ctlinput(int cmd, struct sockaddr *
* is really ours.
*/
if (inp == NULL) {
- inp = in6_pcblookup_listen(&udbtable,
+ inp = in6_pcblookup_listen(&udb6table,
&sa6_src.sin6_addr, uh.uh_sport, NULL,
rdomain))
}
@@ -847,10 +866,10 @@ udp6_ctlinput(int cmd, struct sockaddr *
*/
}
- in6_pcbnotify(&udbtable, &sa6, uh.uh_dport,
+ in6_pcbnotify(&udb6table, &sa6, uh.uh_dport,
&sa6_src, uh.uh_sport, rdomain, cmd, cmdarg, notify);
} else {
- in6_pcbnotify(&udbtable, &sa6, 0,
+ in6_pcbnotify(&udb6table, &sa6, 0,
&sa6_any, 0, rdomain, cmd, cmdarg, notify);
}
}
@@ -1079,6 +1098,7 @@ release:
int
udp_attach(struct socket *so, int proto, int wait)
{
+ struct inpcbtable *tb;
int error;
if (so->so_pcb != NULL)
@@ -1088,7 +1108,13 @@ udp_attach(struct socket *so, int proto,
return error;
NET_ASSERT_LOCKED();
- if ((error = in_pcballoc(so, &udbtable, wait)))
+#ifdef INET6
+ if (so->so_proto->pr_domain->dom_family == PF_INET6)
+ tb = &udb6table;
+ else
+#endif
+ tb = &udbtable;
+ if ((error = in_pcballoc(so, tb, wait)))
return error;
#ifdef INET6
if (sotoinpcb(so)->inp_flags & INP_IPV6)
Index: netinet/udp_var.h
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/udp_var.h,v
diff -u -p -r1.49 udp_var.h
--- netinet/udp_var.h 17 Oct 2022 14:49:02 -0000 1.49
+++ netinet/udp_var.h 7 Jan 2024 23:30:31 -0000
@@ -123,7 +123,7 @@ udpstat_inc(enum udpstat_counters c)
counters_inc(udpcounters, c);
}
-extern struct inpcbtable udbtable;
+extern struct inpcbtable udbtable, udb6table;
extern struct udpstat udpstat;
extern const struct pr_usrreqs udp_usrreqs;
UDP PCB table split IPv4 and IPv6