From: Alexander Bluhm Subject: UDP PCB table split IPv4 and IPv6 To: tech@openbsd.org Date: Mon, 8 Jan 2024 09:59:21 +0100 Hi, Currently we have one UDP table for both IPv4 and IPv6 PCB. I would like to split it like Raw IP and divert tables. Then we have less contention on the table mutex when IPv4 and IPv6 are used simultaneously. Also individual hash tables get smaller and looping over all PCBs gets shorter. ok? bluhm Index: kern/kern_sysctl.c =================================================================== RCS file: /data/mirror/openbsd/cvs/src/sys/kern/kern_sysctl.c,v diff -u -p -r1.420 kern_sysctl.c --- kern/kern_sysctl.c 1 Oct 2023 15:58:12 -0000 1.420 +++ kern/kern_sysctl.c 7 Jan 2024 23:30:31 -0000 @@ -1493,6 +1493,12 @@ sysctl_file(int *name, u_int namelen, ch TAILQ_FOREACH(inp, &udbtable.inpt_queue, inp_queue) FILLSO(inp->inp_socket); mtx_leave(&udbtable.inpt_mtx); +#ifdef INET6 + mtx_enter(&udb6table.inpt_mtx); + TAILQ_FOREACH(inp, &udb6table.inpt_queue, inp_queue) + FILLSO(inp->inp_socket); + mtx_leave(&udb6table.inpt_mtx); +#endif mtx_enter(&rawcbtable.inpt_mtx); TAILQ_FOREACH(inp, &rawcbtable.inpt_queue, inp_queue) FILLSO(inp->inp_socket); Index: net/pf.c =================================================================== RCS file: /data/mirror/openbsd/cvs/src/sys/net/pf.c,v diff -u -p -r1.1192 pf.c --- net/pf.c 1 Jan 2024 22:16:51 -0000 1.1192 +++ net/pf.c 7 Jan 2024 23:30:31 -0000 @@ -3841,6 +3841,8 @@ pf_socket_lookup(struct pf_pdesc *pd) break; #ifdef INET6 case AF_INET6: + if (pd->virtual_proto == IPPROTO_UDP) + tb = &udb6table; inp = in6_pcblookup(tb, &saddr->v6, sport, &daddr->v6, dport, pd->rdomain); if (inp == NULL) { Index: netinet/udp_usrreq.c =================================================================== RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/udp_usrreq.c,v diff -u -p -r1.312 udp_usrreq.c --- netinet/udp_usrreq.c 1 Dec 2023 15:30:47 -0000 1.312 +++ netinet/udp_usrreq.c 7 Jan 2024 23:30:31 -0000 @@ -161,6 +161,9 @@ const struct sysctl_bounded_args udpctl_ }; struct inpcbtable udbtable; +#ifdef INET6 +struct inpcbtable udb6table; +#endif struct cpumem *udpcounters; void udp_sbappend(struct inpcb *, struct mbuf *, struct ip *, @@ -179,6 +182,9 @@ udp_init(void) { udpcounters = counters_alloc(udps_ncounters); in_pcbinit(&udbtable, UDB_INITIAL_HASH_SIZE); +#ifdef INET6 + in_pcbinit(&udb6table, UDB_INITIAL_HASH_SIZE); +#endif } int @@ -375,6 +381,7 @@ udp_input(struct mbuf **mp, int *offp, i if (m->m_flags & (M_BCAST|M_MCAST)) { SIMPLEQ_HEAD(, inpcb) inpcblist; + struct inpcbtable *tb; /* * Deliver a multicast or broadcast datagram to *all* sockets @@ -397,17 +404,24 @@ udp_input(struct mbuf **mp, int *offp, i * (Algorithm copied from raw_intr().) */ SIMPLEQ_INIT(&inpcblist); - rw_enter_write(&udbtable.inpt_notify); - mtx_enter(&udbtable.inpt_mtx); - TAILQ_FOREACH(inp, &udbtable.inpt_queue, inp_queue) { +#ifdef INET6 + if (ip6) + tb = &udb6table; + else +#endif + tb = &udbtable; + + rw_enter_write(&tb->inpt_notify); + mtx_enter(&tb->inpt_mtx); + TAILQ_FOREACH(inp, &tb->inpt_queue, inp_queue) { if (inp->inp_socket->so_rcv.sb_state & SS_CANTRCVMORE) continue; #ifdef INET6 - /* don't accept it if AF does not match */ - if (ip6 && !(inp->inp_flags & INP_IPV6)) - continue; - if (!ip6 && (inp->inp_flags & INP_IPV6)) - continue; + /* table is per AF, panic if it does not match */ + if (ip6) + KASSERT(ISSET(inp->inp_flags, INP_IPV6)); + else + KASSERT(!ISSET(inp->inp_flags, INP_IPV6)); #endif if (rtable_l2(inp->inp_rtableid) != rtable_l2(m->m_pkthdr.ph_rtableid)) @@ -467,10 +481,10 @@ udp_input(struct mbuf **mp, int *offp, i SO_REUSEADDR)) == 0) break; } - mtx_leave(&udbtable.inpt_mtx); + mtx_leave(&tb->inpt_mtx); if (SIMPLEQ_EMPTY(&inpcblist)) { - rw_exit_write(&udbtable.inpt_notify); + rw_exit_write(&tb->inpt_notify); /* * No matching pcb found; discard datagram. @@ -495,7 +509,7 @@ udp_input(struct mbuf **mp, int *offp, i } in_pcbunref(inp); } - rw_exit_write(&udbtable.inpt_notify); + rw_exit_write(&tb->inpt_notify); return IPPROTO_DONE; } @@ -507,25 +521,30 @@ udp_input(struct mbuf **mp, int *offp, i #endif if (inp == NULL) { #ifdef INET6 - if (ip6) - inp = in6_pcblookup(&udbtable, &ip6->ip6_src, + if (ip6) { + inp = in6_pcblookup(&udb6table, &ip6->ip6_src, uh->uh_sport, &ip6->ip6_dst, uh->uh_dport, m->m_pkthdr.ph_rtableid); - else + } else #endif /* INET6 */ - inp = in_pcblookup(&udbtable, ip->ip_src, uh->uh_sport, - ip->ip_dst, uh->uh_dport, m->m_pkthdr.ph_rtableid); + { + inp = in_pcblookup(&udbtable, ip->ip_src, + uh->uh_sport, ip->ip_dst, uh->uh_dport, + m->m_pkthdr.ph_rtableid); + } } if (inp == NULL) { udpstat_inc(udps_pcbhashmiss); #ifdef INET6 if (ip6) { - inp = in6_pcblookup_listen(&udbtable, &ip6->ip6_dst, + inp = in6_pcblookup_listen(&udb6table, &ip6->ip6_dst, uh->uh_dport, m, m->m_pkthdr.ph_rtableid); } else #endif /* INET6 */ - inp = in_pcblookup_listen(&udbtable, ip->ip_dst, - uh->uh_dport, m, m->m_pkthdr.ph_rtableid); + { + inp = in_pcblookup_listen(&udbtable, ip->ip_dst, + uh->uh_dport, m, m->m_pkthdr.ph_rtableid); + } } #ifdef IPSEC @@ -809,7 +828,7 @@ udp6_ctlinput(int cmd, struct sockaddr * * corresponding to the address in the ICMPv6 message * payload. */ - inp = in6_pcblookup(&udbtable, &sa6.sin6_addr, + inp = in6_pcblookup(&udb6table, &sa6.sin6_addr, uh.uh_dport, &sa6_src.sin6_addr, uh.uh_sport, rdomain); #if 0 @@ -821,7 +840,7 @@ udp6_ctlinput(int cmd, struct sockaddr * * is really ours. */ if (inp == NULL) { - inp = in6_pcblookup_listen(&udbtable, + inp = in6_pcblookup_listen(&udb6table, &sa6_src.sin6_addr, uh.uh_sport, NULL, rdomain)) } @@ -847,10 +866,10 @@ udp6_ctlinput(int cmd, struct sockaddr * */ } - in6_pcbnotify(&udbtable, &sa6, uh.uh_dport, + in6_pcbnotify(&udb6table, &sa6, uh.uh_dport, &sa6_src, uh.uh_sport, rdomain, cmd, cmdarg, notify); } else { - in6_pcbnotify(&udbtable, &sa6, 0, + in6_pcbnotify(&udb6table, &sa6, 0, &sa6_any, 0, rdomain, cmd, cmdarg, notify); } } @@ -1079,6 +1098,7 @@ release: int udp_attach(struct socket *so, int proto, int wait) { + struct inpcbtable *tb; int error; if (so->so_pcb != NULL) @@ -1088,7 +1108,13 @@ udp_attach(struct socket *so, int proto, return error; NET_ASSERT_LOCKED(); - if ((error = in_pcballoc(so, &udbtable, wait))) +#ifdef INET6 + if (so->so_proto->pr_domain->dom_family == PF_INET6) + tb = &udb6table; + else +#endif + tb = &udbtable; + if ((error = in_pcballoc(so, tb, wait))) return error; #ifdef INET6 if (sotoinpcb(so)->inp_flags & INP_IPV6) Index: netinet/udp_var.h =================================================================== RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/udp_var.h,v diff -u -p -r1.49 udp_var.h --- netinet/udp_var.h 17 Oct 2022 14:49:02 -0000 1.49 +++ netinet/udp_var.h 7 Jan 2024 23:30:31 -0000 @@ -123,7 +123,7 @@ udpstat_inc(enum udpstat_counters c) counters_inc(udpcounters, c); } -extern struct inpcbtable udbtable; +extern struct inpcbtable udbtable, udb6table; extern struct udpstat udpstat; extern const struct pr_usrreqs udp_usrreqs;