From: Alexander Bluhm Subject: split TCP incpb table in IPv4 and IPv6 To: tech@openbsd.org Date: Fri, 12 Apr 2024 15:11:23 +0200 Hi, A while ago I splitted the UDP inpcb table in v4 and v6 part. Idea was to reduce contention on table lock. Same can be done with TCP. Currently TCP runs with exclusive netlock, so there is not much difference regarding the lock. But with two hash tables each one gets smaller. Also we don't need an if around INP_IPV6, but can assert that it is correct. ok? bluhm Index: kern/kern_sysctl.c =================================================================== RCS file: /data/mirror/openbsd/cvs/src/sys/kern/kern_sysctl.c,v diff -u -p -r1.426 kern_sysctl.c --- kern/kern_sysctl.c 29 Mar 2024 06:50:06 -0000 1.426 +++ kern/kern_sysctl.c 11 Apr 2024 16:39:08 -0000 @@ -1482,6 +1482,12 @@ sysctl_file(int *name, u_int namelen, ch TAILQ_FOREACH(inp, &tcbtable.inpt_queue, inp_queue) FILLSO(inp->inp_socket); mtx_leave(&tcbtable.inpt_mtx); +#ifdef INET6 + mtx_enter(&tcb6table.inpt_mtx); + TAILQ_FOREACH(inp, &tcb6table.inpt_queue, inp_queue) + FILLSO(inp->inp_socket); + mtx_leave(&tcb6table.inpt_mtx); +#endif mtx_enter(&udbtable.inpt_mtx); TAILQ_FOREACH(inp, &udbtable.inpt_queue, inp_queue) FILLSO(inp->inp_socket); Index: net/pf.c =================================================================== RCS file: /data/mirror/openbsd/cvs/src/sys/net/pf.c,v diff -u -p -r1.1193 pf.c --- net/pf.c 10 Jan 2024 16:44:30 -0000 1.1193 +++ net/pf.c 11 Apr 2024 16:39:08 -0000 @@ -3788,7 +3788,7 @@ pf_socket_lookup(struct pf_pdesc *pd) { struct pf_addr *saddr, *daddr; u_int16_t sport, dport; - struct inpcbtable *tb; + struct inpcbtable *table; struct inpcb *inp; pd->lookup.uid = -1; @@ -3800,14 +3800,14 @@ pf_socket_lookup(struct pf_pdesc *pd) dport = pd->hdr.tcp.th_dport; PF_ASSERT_LOCKED(); NET_ASSERT_LOCKED(); - tb = &tcbtable; + table = &tcbtable; break; case IPPROTO_UDP: sport = pd->hdr.udp.uh_sport; dport = pd->hdr.udp.uh_dport; PF_ASSERT_LOCKED(); NET_ASSERT_LOCKED(); - tb = &udbtable; + table = &udbtable; break; default: return (-1); @@ -3830,10 +3830,10 @@ pf_socket_lookup(struct pf_pdesc *pd) * Fails when rtable is changed while evaluating the ruleset * The socket looked up will not match the one hit in the end. */ - inp = in_pcblookup(tb, saddr->v4, sport, daddr->v4, dport, + inp = in_pcblookup(table, saddr->v4, sport, daddr->v4, dport, pd->rdomain); if (inp == NULL) { - inp = in_pcblookup_listen(tb, daddr->v4, dport, + inp = in_pcblookup_listen(table, daddr->v4, dport, NULL, pd->rdomain); if (inp == NULL) return (-1); @@ -3842,11 +3842,13 @@ pf_socket_lookup(struct pf_pdesc *pd) #ifdef INET6 case AF_INET6: if (pd->virtual_proto == IPPROTO_UDP) - tb = &udb6table; - inp = in6_pcblookup(tb, &saddr->v6, sport, &daddr->v6, + table = &udb6table; + if (pd->virtual_proto == IPPROTO_TCP) + table = &tcb6table; + inp = in6_pcblookup(table, &saddr->v6, sport, &daddr->v6, dport, pd->rdomain); if (inp == NULL) { - inp = in6_pcblookup_listen(tb, &daddr->v6, dport, + inp = in6_pcblookup_listen(table, &daddr->v6, dport, NULL, pd->rdomain); if (inp == NULL) return (-1); Index: netinet/in_pcb.c =================================================================== RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/in_pcb.c,v diff -u -p -r1.299 in_pcb.c --- netinet/in_pcb.c 31 Mar 2024 15:53:12 -0000 1.299 +++ netinet/in_pcb.c 11 Apr 2024 16:39:08 -0000 @@ -743,10 +743,8 @@ in_pcbnotifyall(struct inpcbtable *table rw_enter_write(&table->inpt_notify); mtx_enter(&table->inpt_mtx); TAILQ_FOREACH(inp, &table->inpt_queue, inp_queue) { -#ifdef INET6 - if (ISSET(inp->inp_flags, INP_IPV6)) - continue; -#endif + KASSERT(!ISSET(inp->inp_flags, INP_IPV6)); + if (inp->inp_faddr.s_addr != dst->sin_addr.s_addr || rtable_l2(inp->inp_rtableid) != rdomain) { continue; @@ -852,8 +850,7 @@ in_pcblookup_local_lock(struct inpcbtabl wildcard = 0; #ifdef INET6 if (ISSET(flags, INPLOOKUP_IPV6)) { - if (!ISSET(inp->inp_flags, INP_IPV6)) - continue; + KASSERT(ISSET(inp->inp_flags, INP_IPV6)); if (!IN6_IS_ADDR_UNSPECIFIED(&inp->inp_faddr6)) wildcard++; @@ -869,10 +866,7 @@ in_pcblookup_local_lock(struct inpcbtabl } else #endif /* INET6 */ { -#ifdef INET6 - if (ISSET(inp->inp_flags, INP_IPV6)) - continue; -#endif /* INET6 */ + KASSERT(!ISSET(inp->inp_flags, INP_IPV6)); if (inp->inp_faddr.s_addr != INADDR_ANY) wildcard++; @@ -1032,7 +1026,7 @@ in_pcbhash_insert(struct inpcb *inp) &inp->inp_faddr6, inp->inp_fport, &inp->inp_laddr6, inp->inp_lport); else -#endif /* INET6 */ +#endif hash = in_pcbhash(table, rtable_l2(inp->inp_rtableid), &inp->inp_faddr, inp->inp_fport, &inp->inp_laddr, inp->inp_lport); @@ -1052,10 +1046,8 @@ in_pcbhash_lookup(struct inpcbtable *tab head = &table->inpt_hashtbl[hash & table->inpt_mask]; LIST_FOREACH(inp, head, inp_hash) { -#ifdef INET6 - if (ISSET(inp->inp_flags, INP_IPV6)) - continue; -#endif + KASSERT(!ISSET(inp->inp_flags, INP_IPV6)); + if (inp->inp_fport == fport && inp->inp_lport == lport && inp->inp_faddr.s_addr == faddr->s_addr && inp->inp_laddr.s_addr == laddr->s_addr && Index: netinet/tcp_input.c =================================================================== RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/tcp_input.c,v diff -u -p -r1.402 tcp_input.c --- netinet/tcp_input.c 10 Apr 2024 22:10:03 -0000 1.402 +++ netinet/tcp_input.c 11 Apr 2024 16:44:02 -0000 @@ -140,7 +140,8 @@ struct timeval tcp_ackdrop_ppslim_last; #ifdef INET6 #define ND6_HINT(tp) \ do { \ - if (tp && tp->t_inpcb && (tp->t_inpcb->inp_flags & INP_IPV6) && \ + if (tp && tp->t_inpcb && \ + ISSET(tp->t_inpcb->inp_flags, INP_IPV6) && \ rtisvalid(tp->t_inpcb->inp_route.ro_rt)) { \ nd6_nud_hint(tp->t_inpcb->inp_route.ro_rt); \ } \ @@ -540,7 +541,7 @@ findpcb: switch (af) { #ifdef INET6 case AF_INET6: - inp = in6_pcblookup(&tcbtable, &ip6->ip6_src, + inp = in6_pcblookup(&tcb6table, &ip6->ip6_src, th->th_sport, &ip6->ip6_dst, th->th_dport, m->m_pkthdr.ph_rtableid); break; @@ -557,10 +558,10 @@ findpcb: switch (af) { #ifdef INET6 case AF_INET6: - inp = in6_pcblookup_listen(&tcbtable, &ip6->ip6_dst, + inp = in6_pcblookup_listen(&tcb6table, &ip6->ip6_dst, th->th_dport, m, m->m_pkthdr.ph_rtableid); break; -#endif /* INET6 */ +#endif case AF_INET: inp = in_pcblookup_listen(&tcbtable, ip->ip_dst, th->th_dport, m, m->m_pkthdr.ph_rtableid); @@ -3543,17 +3544,16 @@ syn_cache_get(struct sockaddr *src, stru sizeof(oldinp->inp_seclevel)); #endif /* IPSEC */ #ifdef INET6 - /* - * inp still has the OLD in_pcb stuff, set the - * v6-related flags on the new guy, too. - */ - inp->inp_flags |= (oldinp->inp_flags & INP_IPV6); - if (inp->inp_flags & INP_IPV6) { + if (ISSET(inp->inp_flags, INP_IPV6)) { + KASSERT(ISSET(oldinp->inp_flags, INP_IPV6)); + inp->inp_ipv6.ip6_hlim = oldinp->inp_ipv6.ip6_hlim; inp->inp_hops = oldinp->inp_hops; } else -#endif /* INET6 */ +#endif { + KASSERT(!ISSET(oldinp->inp_flags, INP_IPV6)); + inp->inp_ip.ip_ttl = oldinp->inp_ip.ip_ttl; inp->inp_options = ip_srcroute(m); if (inp->inp_options == NULL) { Index: netinet/tcp_subr.c =================================================================== RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/tcp_subr.c,v diff -u -p -r1.199 tcp_subr.c --- netinet/tcp_subr.c 13 Feb 2024 12:22:09 -0000 1.199 +++ netinet/tcp_subr.c 11 Apr 2024 16:39:08 -0000 @@ -159,6 +159,9 @@ tcp_init(void) "sackhl", NULL); pool_sethardlimit(&sackhl_pool, tcp_sackhole_limit, NULL, 0); in_pcbinit(&tcbtable, TCB_INITIAL_HASH_SIZE); +#ifdef INET6 + in_pcbinit(&tcb6table, TCB_INITIAL_HASH_SIZE); +#endif tcpcounters = counters_alloc(tcps_ncounters); arc4random_buf(tcp_secret, sizeof(tcp_secret)); @@ -461,21 +464,15 @@ tcp_newtcpcb(struct inpcb *inp, int wait tp->t_pmtud_mss_acked = 0; #ifdef INET6 - /* we disallow IPv4 mapped address completely. */ - if ((inp->inp_flags & INP_IPV6) == 0) - tp->pf = PF_INET; - else + if (ISSET(inp->inp_flags, INP_IPV6)) { tp->pf = PF_INET6; -#else - tp->pf = PF_INET; -#endif - -#ifdef INET6 - if (inp->inp_flags & INP_IPV6) inp->inp_ipv6.ip6_hlim = ip6_defhlim; - else -#endif /* INET6 */ + } else +#endif + { + tp->pf = PF_INET; inp->inp_ip.ip_ttl = ip_defttl; + } inp->inp_ppcb = (caddr_t)tp; return (tp); @@ -675,7 +672,7 @@ tcp6_ctlinput(int cmd, struct sockaddr * * corresponding to the address in the ICMPv6 message * payload. */ - inp = in6_pcblookup(&tcbtable, &sa6->sin6_addr, + inp = in6_pcblookup(&tcb6table, &sa6->sin6_addr, th.th_dport, &sa6_src->sin6_addr, th.th_sport, rdomain); if (cmd == PRC_MSGSIZE) { /* @@ -703,7 +700,7 @@ tcp6_ctlinput(int cmd, struct sockaddr * rdomain); in_pcbunref(inp); } else { - in6_pcbnotify(&tcbtable, sa6, 0, + in6_pcbnotify(&tcb6table, sa6, 0, sa6_src, 0, rdomain, cmd, NULL, notify); } } @@ -845,7 +842,7 @@ tcp_ctlinput(int cmd, struct sockaddr *s void tcp6_mtudisc_callback(struct sockaddr_in6 *sin6, u_int rdomain) { - in6_pcbnotify(&tcbtable, sin6, 0, + in6_pcbnotify(&tcb6table, sin6, 0, &sa6_any, 0, rdomain, PRC_MSGSIZE, NULL, tcp_mtudisc); } #endif /* INET6 */ Index: netinet/tcp_usrreq.c =================================================================== RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/tcp_usrreq.c,v diff -u -p -r1.230 tcp_usrreq.c --- netinet/tcp_usrreq.c 11 Feb 2024 01:27:45 -0000 1.230 +++ netinet/tcp_usrreq.c 11 Apr 2024 16:39:08 -0000 @@ -171,6 +171,9 @@ const struct sysctl_bounded_args tcpctl_ }; struct inpcbtable tcbtable; +#ifdef INET6 +struct inpcbtable tcb6table; +#endif int tcp_fill_info(struct tcpcb *, struct socket *, struct mbuf *); int tcp_ident(void *, size_t *, void *, size_t, int); @@ -317,7 +320,7 @@ tcp_ctloutput(int op, struct socket *so, if (ISSET(inp->inp_flags, INP_IPV6)) error = ip6_ctloutput(op, so, level, optname, m); else -#endif /* INET6 */ +#endif error = ip_ctloutput(op, so, level, optname, m); return (error); } @@ -452,6 +455,7 @@ tcp_ctloutput(int op, struct socket *so, int tcp_attach(struct socket *so, int proto, int wait) { + struct inpcbtable *table; struct tcpcb *tp; struct inpcb *inp; int error; @@ -467,7 +471,13 @@ tcp_attach(struct socket *so, int proto, } NET_ASSERT_LOCKED(); - error = in_pcballoc(so, &tcbtable, wait); +#ifdef INET6 + if (so->so_proto->pr_domain->dom_family == PF_INET6) + table = &tcb6table; + else +#endif + table = &tcbtable; + error = in_pcballoc(so, table, wait); if (error) return (error); inp = sotoinpcb(so); @@ -482,14 +492,11 @@ tcp_attach(struct socket *so, int proto, } tp->t_state = TCPS_CLOSED; #ifdef INET6 - /* we disallow IPv4 mapped address completely. */ - if (inp->inp_flags & INP_IPV6) + if (ISSET(inp->inp_flags, INP_IPV6)) tp->pf = PF_INET6; else - tp->pf = PF_INET; -#else - tp->pf = PF_INET; #endif + tp->pf = PF_INET; if ((so->so_options & SO_LINGER) && so->so_linger == 0) so->so_linger = TCP_LINGERTIME; @@ -619,7 +626,7 @@ tcp_connect(struct socket *so, struct mb } #ifdef INET6 - if (inp->inp_flags & INP_IPV6) { + if (ISSET(inp->inp_flags, INP_IPV6)) { struct sockaddr_in6 *sin6; if ((error = in6_nam2sin6(nam, &sin6))) @@ -630,7 +637,7 @@ tcp_connect(struct socket *so, struct mb goto out; } } else -#endif /* INET6 */ +#endif { struct sockaddr_in *sin; @@ -1148,7 +1155,7 @@ tcp_ident(void *oldp, size_t *oldlenp, v switch (tir.faddr.ss_family) { #ifdef INET6 case AF_INET6: - inp = in6_pcblookup(&tcbtable, &f6, + inp = in6_pcblookup(&tcb6table, &f6, fin6->sin6_port, &l6, lin6->sin6_port, tir.rdomain); break; #endif @@ -1175,7 +1182,7 @@ tcp_ident(void *oldp, size_t *oldlenp, v switch (tir.faddr.ss_family) { #ifdef INET6 case AF_INET6: - inp = in6_pcblookup_listen(&tcbtable, + inp = in6_pcblookup_listen(&tcb6table, &l6, lin6->sin6_port, NULL, tir.rdomain); break; #endif Index: netinet/tcp_var.h =================================================================== RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/tcp_var.h,v diff -u -p -r1.176 tcp_var.h --- netinet/tcp_var.h 13 Feb 2024 12:22:09 -0000 1.176 +++ netinet/tcp_var.h 11 Apr 2024 16:39:08 -0000 @@ -676,7 +676,7 @@ extern const struct pr_usrreqs tcp6_usrr #endif extern struct pool tcpcb_pool; -extern struct inpcbtable tcbtable; /* head of queue of active tcpcb's */ +extern struct inpcbtable tcbtable, tcb6table; /* queue of active tcpcb's */ extern int tcp_do_rfc1323; /* enabled/disabled? */ extern int tcptv_keep_init; /* [N] time to keep alive initial SYN packet */ extern int tcp_mssdflt; /* default maximum segment size */ Index: netinet/udp_usrreq.c =================================================================== RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/udp_usrreq.c,v diff -u -p -r1.318 udp_usrreq.c --- netinet/udp_usrreq.c 11 Feb 2024 18:14:26 -0000 1.318 +++ netinet/udp_usrreq.c 11 Apr 2024 16:39:08 -0000 @@ -1117,10 +1117,10 @@ udp_attach(struct socket *so, int proto, if ((error = in_pcballoc(so, table, wait))) return error; #ifdef INET6 - if (sotoinpcb(so)->inp_flags & INP_IPV6) + if (ISSET(sotoinpcb(so)->inp_flags, INP_IPV6)) sotoinpcb(so)->inp_ipv6.ip6_hlim = ip6_defhlim; else -#endif /* INET6 */ +#endif sotoinpcb(so)->inp_ip.ip_ttl = ip_defttl; return 0; } @@ -1184,11 +1184,11 @@ udp_connect(struct socket *so, struct mb soassertlocked(so); #ifdef INET6 - if (inp->inp_flags & INP_IPV6) { + if (ISSET(inp->inp_flags, INP_IPV6)) { if (!IN6_IS_ADDR_UNSPECIFIED(&inp->inp_faddr6)) return (EISCONN); } else -#endif /* INET6 */ +#endif { if (inp->inp_faddr.s_addr != INADDR_ANY) return (EISCONN); @@ -1209,11 +1209,11 @@ udp_disconnect(struct socket *so) soassertlocked(so); #ifdef INET6 - if (inp->inp_flags & INP_IPV6) { + if (ISSET(inp->inp_flags, INP_IPV6)) { if (IN6_IS_ADDR_UNSPECIFIED(&inp->inp_faddr6)) return (ENOTCONN); } else -#endif /* INET6 */ +#endif { if (inp->inp_faddr.s_addr == INADDR_ANY) return (ENOTCONN); @@ -1251,7 +1251,7 @@ udp_send(struct socket *so, struct mbuf mtod(addr, struct sockaddr *)); else #ifdef INET6 - if (inp->inp_flags & INP_IPV6) + if (ISSET(inp->inp_flags, INP_IPV6)) session = pipex_l2tp_userland_lookup_session_ipv6( m, inp->inp_faddr6); Index: netinet6/in6_pcb.c =================================================================== RCS file: /data/mirror/openbsd/cvs/src/sys/netinet6/in6_pcb.c,v diff -u -p -r1.143 in6_pcb.c --- netinet6/in6_pcb.c 31 Mar 2024 15:53:12 -0000 1.143 +++ netinet6/in6_pcb.c 11 Apr 2024 16:39:08 -0000 @@ -479,8 +479,7 @@ in6_pcbnotify(struct inpcbtable *table, rw_enter_write(&table->inpt_notify); mtx_enter(&table->inpt_mtx); TAILQ_FOREACH(inp, &table->inpt_queue, inp_queue) { - if (!ISSET(inp->inp_flags, INP_IPV6)) - continue; + KASSERT(ISSET(inp->inp_flags, INP_IPV6)); /* * Under the following condition, notify of redirects @@ -580,8 +579,8 @@ in6_pcbhash_lookup(struct inpcbtable *ta head = &table->inpt_hashtbl[hash & table->inpt_mask]; LIST_FOREACH(inp, head, inp_hash) { - if (!ISSET(inp->inp_flags, INP_IPV6)) - continue; + KASSERT(ISSET(inp->inp_flags, INP_IPV6)); + if (inp->inp_fport == fport && inp->inp_lport == lport && IN6_ARE_ADDR_EQUAL(&inp->inp_faddr6, faddr) && IN6_ARE_ADDR_EQUAL(&inp->inp_laddr6, laddr) &&