Download raw body.
split TCP incpb table in IPv4 and IPv6
On Fri, Apr 12, 2024 at 03:11:23PM +0200, Alexander Bluhm wrote:
> Hi,
>
> A while ago I splitted the UDP inpcb table in v4 and v6 part. Idea
> was to reduce contention on table lock. Same can be done with TCP.
>
> Currently TCP runs with exclusive netlock, so there is not much
> difference regarding the lock. But with two hash tables each one
> gets smaller. Also we don't need an if around INP_IPV6, but can
> assert that it is correct.
>
> ok?
>
ok mvs
> bluhm
>
> Index: kern/kern_sysctl.c
> ===================================================================
> RCS file: /data/mirror/openbsd/cvs/src/sys/kern/kern_sysctl.c,v
> diff -u -p -r1.426 kern_sysctl.c
> --- kern/kern_sysctl.c 29 Mar 2024 06:50:06 -0000 1.426
> +++ kern/kern_sysctl.c 11 Apr 2024 16:39:08 -0000
> @@ -1482,6 +1482,12 @@ sysctl_file(int *name, u_int namelen, ch
> TAILQ_FOREACH(inp, &tcbtable.inpt_queue, inp_queue)
> FILLSO(inp->inp_socket);
> mtx_leave(&tcbtable.inpt_mtx);
> +#ifdef INET6
> + mtx_enter(&tcb6table.inpt_mtx);
> + TAILQ_FOREACH(inp, &tcb6table.inpt_queue, inp_queue)
> + FILLSO(inp->inp_socket);
> + mtx_leave(&tcb6table.inpt_mtx);
> +#endif
> mtx_enter(&udbtable.inpt_mtx);
> TAILQ_FOREACH(inp, &udbtable.inpt_queue, inp_queue)
> FILLSO(inp->inp_socket);
> Index: net/pf.c
> ===================================================================
> RCS file: /data/mirror/openbsd/cvs/src/sys/net/pf.c,v
> diff -u -p -r1.1193 pf.c
> --- net/pf.c 10 Jan 2024 16:44:30 -0000 1.1193
> +++ net/pf.c 11 Apr 2024 16:39:08 -0000
> @@ -3788,7 +3788,7 @@ pf_socket_lookup(struct pf_pdesc *pd)
> {
> struct pf_addr *saddr, *daddr;
> u_int16_t sport, dport;
> - struct inpcbtable *tb;
> + struct inpcbtable *table;
> struct inpcb *inp;
>
> pd->lookup.uid = -1;
> @@ -3800,14 +3800,14 @@ pf_socket_lookup(struct pf_pdesc *pd)
> dport = pd->hdr.tcp.th_dport;
> PF_ASSERT_LOCKED();
> NET_ASSERT_LOCKED();
> - tb = &tcbtable;
> + table = &tcbtable;
> break;
> case IPPROTO_UDP:
> sport = pd->hdr.udp.uh_sport;
> dport = pd->hdr.udp.uh_dport;
> PF_ASSERT_LOCKED();
> NET_ASSERT_LOCKED();
> - tb = &udbtable;
> + table = &udbtable;
> break;
> default:
> return (-1);
> @@ -3830,10 +3830,10 @@ pf_socket_lookup(struct pf_pdesc *pd)
> * Fails when rtable is changed while evaluating the ruleset
> * The socket looked up will not match the one hit in the end.
> */
> - inp = in_pcblookup(tb, saddr->v4, sport, daddr->v4, dport,
> + inp = in_pcblookup(table, saddr->v4, sport, daddr->v4, dport,
> pd->rdomain);
> if (inp == NULL) {
> - inp = in_pcblookup_listen(tb, daddr->v4, dport,
> + inp = in_pcblookup_listen(table, daddr->v4, dport,
> NULL, pd->rdomain);
> if (inp == NULL)
> return (-1);
> @@ -3842,11 +3842,13 @@ pf_socket_lookup(struct pf_pdesc *pd)
> #ifdef INET6
> case AF_INET6:
> if (pd->virtual_proto == IPPROTO_UDP)
> - tb = &udb6table;
> - inp = in6_pcblookup(tb, &saddr->v6, sport, &daddr->v6,
> + table = &udb6table;
> + if (pd->virtual_proto == IPPROTO_TCP)
> + table = &tcb6table;
> + inp = in6_pcblookup(table, &saddr->v6, sport, &daddr->v6,
> dport, pd->rdomain);
> if (inp == NULL) {
> - inp = in6_pcblookup_listen(tb, &daddr->v6, dport,
> + inp = in6_pcblookup_listen(table, &daddr->v6, dport,
> NULL, pd->rdomain);
> if (inp == NULL)
> return (-1);
> Index: netinet/in_pcb.c
> ===================================================================
> RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/in_pcb.c,v
> diff -u -p -r1.299 in_pcb.c
> --- netinet/in_pcb.c 31 Mar 2024 15:53:12 -0000 1.299
> +++ netinet/in_pcb.c 11 Apr 2024 16:39:08 -0000
> @@ -743,10 +743,8 @@ in_pcbnotifyall(struct inpcbtable *table
> rw_enter_write(&table->inpt_notify);
> mtx_enter(&table->inpt_mtx);
> TAILQ_FOREACH(inp, &table->inpt_queue, inp_queue) {
> -#ifdef INET6
> - if (ISSET(inp->inp_flags, INP_IPV6))
> - continue;
> -#endif
> + KASSERT(!ISSET(inp->inp_flags, INP_IPV6));
> +
> if (inp->inp_faddr.s_addr != dst->sin_addr.s_addr ||
> rtable_l2(inp->inp_rtableid) != rdomain) {
> continue;
> @@ -852,8 +850,7 @@ in_pcblookup_local_lock(struct inpcbtabl
> wildcard = 0;
> #ifdef INET6
> if (ISSET(flags, INPLOOKUP_IPV6)) {
> - if (!ISSET(inp->inp_flags, INP_IPV6))
> - continue;
> + KASSERT(ISSET(inp->inp_flags, INP_IPV6));
>
> if (!IN6_IS_ADDR_UNSPECIFIED(&inp->inp_faddr6))
> wildcard++;
> @@ -869,10 +866,7 @@ in_pcblookup_local_lock(struct inpcbtabl
> } else
> #endif /* INET6 */
> {
> -#ifdef INET6
> - if (ISSET(inp->inp_flags, INP_IPV6))
> - continue;
> -#endif /* INET6 */
> + KASSERT(!ISSET(inp->inp_flags, INP_IPV6));
>
> if (inp->inp_faddr.s_addr != INADDR_ANY)
> wildcard++;
> @@ -1032,7 +1026,7 @@ in_pcbhash_insert(struct inpcb *inp)
> &inp->inp_faddr6, inp->inp_fport,
> &inp->inp_laddr6, inp->inp_lport);
> else
> -#endif /* INET6 */
> +#endif
> hash = in_pcbhash(table, rtable_l2(inp->inp_rtableid),
> &inp->inp_faddr, inp->inp_fport,
> &inp->inp_laddr, inp->inp_lport);
> @@ -1052,10 +1046,8 @@ in_pcbhash_lookup(struct inpcbtable *tab
>
> head = &table->inpt_hashtbl[hash & table->inpt_mask];
> LIST_FOREACH(inp, head, inp_hash) {
> -#ifdef INET6
> - if (ISSET(inp->inp_flags, INP_IPV6))
> - continue;
> -#endif
> + KASSERT(!ISSET(inp->inp_flags, INP_IPV6));
> +
> if (inp->inp_fport == fport && inp->inp_lport == lport &&
> inp->inp_faddr.s_addr == faddr->s_addr &&
> inp->inp_laddr.s_addr == laddr->s_addr &&
> Index: netinet/tcp_input.c
> ===================================================================
> RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/tcp_input.c,v
> diff -u -p -r1.402 tcp_input.c
> --- netinet/tcp_input.c 10 Apr 2024 22:10:03 -0000 1.402
> +++ netinet/tcp_input.c 11 Apr 2024 16:44:02 -0000
> @@ -140,7 +140,8 @@ struct timeval tcp_ackdrop_ppslim_last;
> #ifdef INET6
> #define ND6_HINT(tp) \
> do { \
> - if (tp && tp->t_inpcb && (tp->t_inpcb->inp_flags & INP_IPV6) && \
> + if (tp && tp->t_inpcb && \
> + ISSET(tp->t_inpcb->inp_flags, INP_IPV6) && \
> rtisvalid(tp->t_inpcb->inp_route.ro_rt)) { \
> nd6_nud_hint(tp->t_inpcb->inp_route.ro_rt); \
> } \
> @@ -540,7 +541,7 @@ findpcb:
> switch (af) {
> #ifdef INET6
> case AF_INET6:
> - inp = in6_pcblookup(&tcbtable, &ip6->ip6_src,
> + inp = in6_pcblookup(&tcb6table, &ip6->ip6_src,
> th->th_sport, &ip6->ip6_dst, th->th_dport,
> m->m_pkthdr.ph_rtableid);
> break;
> @@ -557,10 +558,10 @@ findpcb:
> switch (af) {
> #ifdef INET6
> case AF_INET6:
> - inp = in6_pcblookup_listen(&tcbtable, &ip6->ip6_dst,
> + inp = in6_pcblookup_listen(&tcb6table, &ip6->ip6_dst,
> th->th_dport, m, m->m_pkthdr.ph_rtableid);
> break;
> -#endif /* INET6 */
> +#endif
> case AF_INET:
> inp = in_pcblookup_listen(&tcbtable, ip->ip_dst,
> th->th_dport, m, m->m_pkthdr.ph_rtableid);
> @@ -3543,17 +3544,16 @@ syn_cache_get(struct sockaddr *src, stru
> sizeof(oldinp->inp_seclevel));
> #endif /* IPSEC */
> #ifdef INET6
> - /*
> - * inp still has the OLD in_pcb stuff, set the
> - * v6-related flags on the new guy, too.
> - */
> - inp->inp_flags |= (oldinp->inp_flags & INP_IPV6);
> - if (inp->inp_flags & INP_IPV6) {
> + if (ISSET(inp->inp_flags, INP_IPV6)) {
> + KASSERT(ISSET(oldinp->inp_flags, INP_IPV6));
> +
> inp->inp_ipv6.ip6_hlim = oldinp->inp_ipv6.ip6_hlim;
> inp->inp_hops = oldinp->inp_hops;
> } else
> -#endif /* INET6 */
> +#endif
> {
> + KASSERT(!ISSET(oldinp->inp_flags, INP_IPV6));
> +
> inp->inp_ip.ip_ttl = oldinp->inp_ip.ip_ttl;
> inp->inp_options = ip_srcroute(m);
> if (inp->inp_options == NULL) {
> Index: netinet/tcp_subr.c
> ===================================================================
> RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/tcp_subr.c,v
> diff -u -p -r1.199 tcp_subr.c
> --- netinet/tcp_subr.c 13 Feb 2024 12:22:09 -0000 1.199
> +++ netinet/tcp_subr.c 11 Apr 2024 16:39:08 -0000
> @@ -159,6 +159,9 @@ tcp_init(void)
> "sackhl", NULL);
> pool_sethardlimit(&sackhl_pool, tcp_sackhole_limit, NULL, 0);
> in_pcbinit(&tcbtable, TCB_INITIAL_HASH_SIZE);
> +#ifdef INET6
> + in_pcbinit(&tcb6table, TCB_INITIAL_HASH_SIZE);
> +#endif
> tcpcounters = counters_alloc(tcps_ncounters);
>
> arc4random_buf(tcp_secret, sizeof(tcp_secret));
> @@ -461,21 +464,15 @@ tcp_newtcpcb(struct inpcb *inp, int wait
> tp->t_pmtud_mss_acked = 0;
>
> #ifdef INET6
> - /* we disallow IPv4 mapped address completely. */
> - if ((inp->inp_flags & INP_IPV6) == 0)
> - tp->pf = PF_INET;
> - else
> + if (ISSET(inp->inp_flags, INP_IPV6)) {
> tp->pf = PF_INET6;
> -#else
> - tp->pf = PF_INET;
> -#endif
> -
> -#ifdef INET6
> - if (inp->inp_flags & INP_IPV6)
> inp->inp_ipv6.ip6_hlim = ip6_defhlim;
> - else
> -#endif /* INET6 */
> + } else
> +#endif
> + {
> + tp->pf = PF_INET;
> inp->inp_ip.ip_ttl = ip_defttl;
> + }
>
> inp->inp_ppcb = (caddr_t)tp;
> return (tp);
> @@ -675,7 +672,7 @@ tcp6_ctlinput(int cmd, struct sockaddr *
> * corresponding to the address in the ICMPv6 message
> * payload.
> */
> - inp = in6_pcblookup(&tcbtable, &sa6->sin6_addr,
> + inp = in6_pcblookup(&tcb6table, &sa6->sin6_addr,
> th.th_dport, &sa6_src->sin6_addr, th.th_sport, rdomain);
> if (cmd == PRC_MSGSIZE) {
> /*
> @@ -703,7 +700,7 @@ tcp6_ctlinput(int cmd, struct sockaddr *
> rdomain);
> in_pcbunref(inp);
> } else {
> - in6_pcbnotify(&tcbtable, sa6, 0,
> + in6_pcbnotify(&tcb6table, sa6, 0,
> sa6_src, 0, rdomain, cmd, NULL, notify);
> }
> }
> @@ -845,7 +842,7 @@ tcp_ctlinput(int cmd, struct sockaddr *s
> void
> tcp6_mtudisc_callback(struct sockaddr_in6 *sin6, u_int rdomain)
> {
> - in6_pcbnotify(&tcbtable, sin6, 0,
> + in6_pcbnotify(&tcb6table, sin6, 0,
> &sa6_any, 0, rdomain, PRC_MSGSIZE, NULL, tcp_mtudisc);
> }
> #endif /* INET6 */
> Index: netinet/tcp_usrreq.c
> ===================================================================
> RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/tcp_usrreq.c,v
> diff -u -p -r1.230 tcp_usrreq.c
> --- netinet/tcp_usrreq.c 11 Feb 2024 01:27:45 -0000 1.230
> +++ netinet/tcp_usrreq.c 11 Apr 2024 16:39:08 -0000
> @@ -171,6 +171,9 @@ const struct sysctl_bounded_args tcpctl_
> };
>
> struct inpcbtable tcbtable;
> +#ifdef INET6
> +struct inpcbtable tcb6table;
> +#endif
>
> int tcp_fill_info(struct tcpcb *, struct socket *, struct mbuf *);
> int tcp_ident(void *, size_t *, void *, size_t, int);
> @@ -317,7 +320,7 @@ tcp_ctloutput(int op, struct socket *so,
> if (ISSET(inp->inp_flags, INP_IPV6))
> error = ip6_ctloutput(op, so, level, optname, m);
> else
> -#endif /* INET6 */
> +#endif
> error = ip_ctloutput(op, so, level, optname, m);
> return (error);
> }
> @@ -452,6 +455,7 @@ tcp_ctloutput(int op, struct socket *so,
> int
> tcp_attach(struct socket *so, int proto, int wait)
> {
> + struct inpcbtable *table;
> struct tcpcb *tp;
> struct inpcb *inp;
> int error;
> @@ -467,7 +471,13 @@ tcp_attach(struct socket *so, int proto,
> }
>
> NET_ASSERT_LOCKED();
> - error = in_pcballoc(so, &tcbtable, wait);
> +#ifdef INET6
> + if (so->so_proto->pr_domain->dom_family == PF_INET6)
> + table = &tcb6table;
> + else
> +#endif
> + table = &tcbtable;
> + error = in_pcballoc(so, table, wait);
> if (error)
> return (error);
> inp = sotoinpcb(so);
> @@ -482,14 +492,11 @@ tcp_attach(struct socket *so, int proto,
> }
> tp->t_state = TCPS_CLOSED;
> #ifdef INET6
> - /* we disallow IPv4 mapped address completely. */
> - if (inp->inp_flags & INP_IPV6)
> + if (ISSET(inp->inp_flags, INP_IPV6))
> tp->pf = PF_INET6;
> else
> - tp->pf = PF_INET;
> -#else
> - tp->pf = PF_INET;
> #endif
> + tp->pf = PF_INET;
> if ((so->so_options & SO_LINGER) && so->so_linger == 0)
> so->so_linger = TCP_LINGERTIME;
>
> @@ -619,7 +626,7 @@ tcp_connect(struct socket *so, struct mb
> }
>
> #ifdef INET6
> - if (inp->inp_flags & INP_IPV6) {
> + if (ISSET(inp->inp_flags, INP_IPV6)) {
> struct sockaddr_in6 *sin6;
>
> if ((error = in6_nam2sin6(nam, &sin6)))
> @@ -630,7 +637,7 @@ tcp_connect(struct socket *so, struct mb
> goto out;
> }
> } else
> -#endif /* INET6 */
> +#endif
> {
> struct sockaddr_in *sin;
>
> @@ -1148,7 +1155,7 @@ tcp_ident(void *oldp, size_t *oldlenp, v
> switch (tir.faddr.ss_family) {
> #ifdef INET6
> case AF_INET6:
> - inp = in6_pcblookup(&tcbtable, &f6,
> + inp = in6_pcblookup(&tcb6table, &f6,
> fin6->sin6_port, &l6, lin6->sin6_port, tir.rdomain);
> break;
> #endif
> @@ -1175,7 +1182,7 @@ tcp_ident(void *oldp, size_t *oldlenp, v
> switch (tir.faddr.ss_family) {
> #ifdef INET6
> case AF_INET6:
> - inp = in6_pcblookup_listen(&tcbtable,
> + inp = in6_pcblookup_listen(&tcb6table,
> &l6, lin6->sin6_port, NULL, tir.rdomain);
> break;
> #endif
> Index: netinet/tcp_var.h
> ===================================================================
> RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/tcp_var.h,v
> diff -u -p -r1.176 tcp_var.h
> --- netinet/tcp_var.h 13 Feb 2024 12:22:09 -0000 1.176
> +++ netinet/tcp_var.h 11 Apr 2024 16:39:08 -0000
> @@ -676,7 +676,7 @@ extern const struct pr_usrreqs tcp6_usrr
> #endif
>
> extern struct pool tcpcb_pool;
> -extern struct inpcbtable tcbtable; /* head of queue of active tcpcb's */
> +extern struct inpcbtable tcbtable, tcb6table; /* queue of active tcpcb's */
> extern int tcp_do_rfc1323; /* enabled/disabled? */
> extern int tcptv_keep_init; /* [N] time to keep alive initial SYN packet */
> extern int tcp_mssdflt; /* default maximum segment size */
> Index: netinet/udp_usrreq.c
> ===================================================================
> RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/udp_usrreq.c,v
> diff -u -p -r1.318 udp_usrreq.c
> --- netinet/udp_usrreq.c 11 Feb 2024 18:14:26 -0000 1.318
> +++ netinet/udp_usrreq.c 11 Apr 2024 16:39:08 -0000
> @@ -1117,10 +1117,10 @@ udp_attach(struct socket *so, int proto,
> if ((error = in_pcballoc(so, table, wait)))
> return error;
> #ifdef INET6
> - if (sotoinpcb(so)->inp_flags & INP_IPV6)
> + if (ISSET(sotoinpcb(so)->inp_flags, INP_IPV6))
> sotoinpcb(so)->inp_ipv6.ip6_hlim = ip6_defhlim;
> else
> -#endif /* INET6 */
> +#endif
> sotoinpcb(so)->inp_ip.ip_ttl = ip_defttl;
> return 0;
> }
> @@ -1184,11 +1184,11 @@ udp_connect(struct socket *so, struct mb
> soassertlocked(so);
>
> #ifdef INET6
> - if (inp->inp_flags & INP_IPV6) {
> + if (ISSET(inp->inp_flags, INP_IPV6)) {
> if (!IN6_IS_ADDR_UNSPECIFIED(&inp->inp_faddr6))
> return (EISCONN);
> } else
> -#endif /* INET6 */
> +#endif
> {
> if (inp->inp_faddr.s_addr != INADDR_ANY)
> return (EISCONN);
> @@ -1209,11 +1209,11 @@ udp_disconnect(struct socket *so)
> soassertlocked(so);
>
> #ifdef INET6
> - if (inp->inp_flags & INP_IPV6) {
> + if (ISSET(inp->inp_flags, INP_IPV6)) {
> if (IN6_IS_ADDR_UNSPECIFIED(&inp->inp_faddr6))
> return (ENOTCONN);
> } else
> -#endif /* INET6 */
> +#endif
> {
> if (inp->inp_faddr.s_addr == INADDR_ANY)
> return (ENOTCONN);
> @@ -1251,7 +1251,7 @@ udp_send(struct socket *so, struct mbuf
> mtod(addr, struct sockaddr *));
> else
> #ifdef INET6
> - if (inp->inp_flags & INP_IPV6)
> + if (ISSET(inp->inp_flags, INP_IPV6))
> session =
> pipex_l2tp_userland_lookup_session_ipv6(
> m, inp->inp_faddr6);
> Index: netinet6/in6_pcb.c
> ===================================================================
> RCS file: /data/mirror/openbsd/cvs/src/sys/netinet6/in6_pcb.c,v
> diff -u -p -r1.143 in6_pcb.c
> --- netinet6/in6_pcb.c 31 Mar 2024 15:53:12 -0000 1.143
> +++ netinet6/in6_pcb.c 11 Apr 2024 16:39:08 -0000
> @@ -479,8 +479,7 @@ in6_pcbnotify(struct inpcbtable *table,
> rw_enter_write(&table->inpt_notify);
> mtx_enter(&table->inpt_mtx);
> TAILQ_FOREACH(inp, &table->inpt_queue, inp_queue) {
> - if (!ISSET(inp->inp_flags, INP_IPV6))
> - continue;
> + KASSERT(ISSET(inp->inp_flags, INP_IPV6));
>
> /*
> * Under the following condition, notify of redirects
> @@ -580,8 +579,8 @@ in6_pcbhash_lookup(struct inpcbtable *ta
>
> head = &table->inpt_hashtbl[hash & table->inpt_mask];
> LIST_FOREACH(inp, head, inp_hash) {
> - if (!ISSET(inp->inp_flags, INP_IPV6))
> - continue;
> + KASSERT(ISSET(inp->inp_flags, INP_IPV6));
> +
> if (inp->inp_fport == fport && inp->inp_lport == lport &&
> IN6_ARE_ADDR_EQUAL(&inp->inp_faddr6, faddr) &&
> IN6_ARE_ADDR_EQUAL(&inp->inp_laddr6, laddr) &&
>
split TCP incpb table in IPv4 and IPv6