Index | Thread | Search

From:
Vitaliy Makkoveev <mvs@openbsd.org>
Subject:
Re: split TCP incpb table in IPv4 and IPv6
To:
Alexander Bluhm <bluhm@openbsd.org>
Cc:
tech@openbsd.org
Date:
Fri, 12 Apr 2024 17:35:27 +0300

Download raw body.

Thread
On Fri, Apr 12, 2024 at 03:11:23PM +0200, Alexander Bluhm wrote:
> Hi,
> 
> A while ago I splitted the UDP inpcb table in v4 and v6 part.  Idea
> was to reduce contention on table lock.  Same can be done with TCP.
> 
> Currently TCP runs with exclusive netlock, so there is not much
> difference regarding the lock.  But with two hash tables each one
> gets smaller.  Also we don't need an if around INP_IPV6, but can
> assert that it is correct.
> 
> ok?
> 

ok mvs

> bluhm
> 
> Index: kern/kern_sysctl.c
> ===================================================================
> RCS file: /data/mirror/openbsd/cvs/src/sys/kern/kern_sysctl.c,v
> diff -u -p -r1.426 kern_sysctl.c
> --- kern/kern_sysctl.c	29 Mar 2024 06:50:06 -0000	1.426
> +++ kern/kern_sysctl.c	11 Apr 2024 16:39:08 -0000
> @@ -1482,6 +1482,12 @@ sysctl_file(int *name, u_int namelen, ch
>  			TAILQ_FOREACH(inp, &tcbtable.inpt_queue, inp_queue)
>  				FILLSO(inp->inp_socket);
>  			mtx_leave(&tcbtable.inpt_mtx);
> +#ifdef INET6
> +			mtx_enter(&tcb6table.inpt_mtx);
> +			TAILQ_FOREACH(inp, &tcb6table.inpt_queue, inp_queue)
> +				FILLSO(inp->inp_socket);
> +			mtx_leave(&tcb6table.inpt_mtx);
> +#endif
>  			mtx_enter(&udbtable.inpt_mtx);
>  			TAILQ_FOREACH(inp, &udbtable.inpt_queue, inp_queue)
>  				FILLSO(inp->inp_socket);
> Index: net/pf.c
> ===================================================================
> RCS file: /data/mirror/openbsd/cvs/src/sys/net/pf.c,v
> diff -u -p -r1.1193 pf.c
> --- net/pf.c	10 Jan 2024 16:44:30 -0000	1.1193
> +++ net/pf.c	11 Apr 2024 16:39:08 -0000
> @@ -3788,7 +3788,7 @@ pf_socket_lookup(struct pf_pdesc *pd)
>  {
>  	struct pf_addr		*saddr, *daddr;
>  	u_int16_t		 sport, dport;
> -	struct inpcbtable	*tb;
> +	struct inpcbtable	*table;
>  	struct inpcb		*inp;
>  
>  	pd->lookup.uid = -1;
> @@ -3800,14 +3800,14 @@ pf_socket_lookup(struct pf_pdesc *pd)
>  		dport = pd->hdr.tcp.th_dport;
>  		PF_ASSERT_LOCKED();
>  		NET_ASSERT_LOCKED();
> -		tb = &tcbtable;
> +		table = &tcbtable;
>  		break;
>  	case IPPROTO_UDP:
>  		sport = pd->hdr.udp.uh_sport;
>  		dport = pd->hdr.udp.uh_dport;
>  		PF_ASSERT_LOCKED();
>  		NET_ASSERT_LOCKED();
> -		tb = &udbtable;
> +		table = &udbtable;
>  		break;
>  	default:
>  		return (-1);
> @@ -3830,10 +3830,10 @@ pf_socket_lookup(struct pf_pdesc *pd)
>  		 * Fails when rtable is changed while evaluating the ruleset
>  		 * The socket looked up will not match the one hit in the end.
>  		 */
> -		inp = in_pcblookup(tb, saddr->v4, sport, daddr->v4, dport,
> +		inp = in_pcblookup(table, saddr->v4, sport, daddr->v4, dport,
>  		    pd->rdomain);
>  		if (inp == NULL) {
> -			inp = in_pcblookup_listen(tb, daddr->v4, dport,
> +			inp = in_pcblookup_listen(table, daddr->v4, dport,
>  			    NULL, pd->rdomain);
>  			if (inp == NULL)
>  				return (-1);
> @@ -3842,11 +3842,13 @@ pf_socket_lookup(struct pf_pdesc *pd)
>  #ifdef INET6
>  	case AF_INET6:
>  		if (pd->virtual_proto == IPPROTO_UDP)
> -			tb = &udb6table;
> -		inp = in6_pcblookup(tb, &saddr->v6, sport, &daddr->v6,
> +			table = &udb6table;
> +		if (pd->virtual_proto == IPPROTO_TCP)
> +			table = &tcb6table;
> +		inp = in6_pcblookup(table, &saddr->v6, sport, &daddr->v6,
>  		    dport, pd->rdomain);
>  		if (inp == NULL) {
> -			inp = in6_pcblookup_listen(tb, &daddr->v6, dport,
> +			inp = in6_pcblookup_listen(table, &daddr->v6, dport,
>  			    NULL, pd->rdomain);
>  			if (inp == NULL)
>  				return (-1);
> Index: netinet/in_pcb.c
> ===================================================================
> RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/in_pcb.c,v
> diff -u -p -r1.299 in_pcb.c
> --- netinet/in_pcb.c	31 Mar 2024 15:53:12 -0000	1.299
> +++ netinet/in_pcb.c	11 Apr 2024 16:39:08 -0000
> @@ -743,10 +743,8 @@ in_pcbnotifyall(struct inpcbtable *table
>  	rw_enter_write(&table->inpt_notify);
>  	mtx_enter(&table->inpt_mtx);
>  	TAILQ_FOREACH(inp, &table->inpt_queue, inp_queue) {
> -#ifdef INET6
> -		if (ISSET(inp->inp_flags, INP_IPV6))
> -			continue;
> -#endif
> +		KASSERT(!ISSET(inp->inp_flags, INP_IPV6));
> +
>  		if (inp->inp_faddr.s_addr != dst->sin_addr.s_addr ||
>  		    rtable_l2(inp->inp_rtableid) != rdomain) {
>  			continue;
> @@ -852,8 +850,7 @@ in_pcblookup_local_lock(struct inpcbtabl
>  		wildcard = 0;
>  #ifdef INET6
>  		if (ISSET(flags, INPLOOKUP_IPV6)) {
> -			if (!ISSET(inp->inp_flags, INP_IPV6))
> -				continue;
> +			KASSERT(ISSET(inp->inp_flags, INP_IPV6));
>  
>  			if (!IN6_IS_ADDR_UNSPECIFIED(&inp->inp_faddr6))
>  				wildcard++;
> @@ -869,10 +866,7 @@ in_pcblookup_local_lock(struct inpcbtabl
>  		} else
>  #endif /* INET6 */
>  		{
> -#ifdef INET6
> -			if (ISSET(inp->inp_flags, INP_IPV6))
> -				continue;
> -#endif /* INET6 */
> +			KASSERT(!ISSET(inp->inp_flags, INP_IPV6));
>  
>  			if (inp->inp_faddr.s_addr != INADDR_ANY)
>  				wildcard++;
> @@ -1032,7 +1026,7 @@ in_pcbhash_insert(struct inpcb *inp)
>  		    &inp->inp_faddr6, inp->inp_fport,
>  		    &inp->inp_laddr6, inp->inp_lport);
>  	else
> -#endif /* INET6 */
> +#endif
>  		hash = in_pcbhash(table, rtable_l2(inp->inp_rtableid),
>  		    &inp->inp_faddr, inp->inp_fport,
>  		    &inp->inp_laddr, inp->inp_lport);
> @@ -1052,10 +1046,8 @@ in_pcbhash_lookup(struct inpcbtable *tab
>  
>  	head = &table->inpt_hashtbl[hash & table->inpt_mask];
>  	LIST_FOREACH(inp, head, inp_hash) {
> -#ifdef INET6
> -		if (ISSET(inp->inp_flags, INP_IPV6))
> -			continue;
> -#endif
> +		KASSERT(!ISSET(inp->inp_flags, INP_IPV6));
> +
>  		if (inp->inp_fport == fport && inp->inp_lport == lport &&
>  		    inp->inp_faddr.s_addr == faddr->s_addr &&
>  		    inp->inp_laddr.s_addr == laddr->s_addr &&
> Index: netinet/tcp_input.c
> ===================================================================
> RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/tcp_input.c,v
> diff -u -p -r1.402 tcp_input.c
> --- netinet/tcp_input.c	10 Apr 2024 22:10:03 -0000	1.402
> +++ netinet/tcp_input.c	11 Apr 2024 16:44:02 -0000
> @@ -140,7 +140,8 @@ struct timeval tcp_ackdrop_ppslim_last;
>  #ifdef INET6
>  #define ND6_HINT(tp) \
>  do { \
> -	if (tp && tp->t_inpcb && (tp->t_inpcb->inp_flags & INP_IPV6) &&	\
> +	if (tp && tp->t_inpcb &&					\
> +	    ISSET(tp->t_inpcb->inp_flags, INP_IPV6) &&			\
>  	    rtisvalid(tp->t_inpcb->inp_route.ro_rt)) {			\
>  		nd6_nud_hint(tp->t_inpcb->inp_route.ro_rt);		\
>  	} \
> @@ -540,7 +541,7 @@ findpcb:
>  		switch (af) {
>  #ifdef INET6
>  		case AF_INET6:
> -			inp = in6_pcblookup(&tcbtable, &ip6->ip6_src,
> +			inp = in6_pcblookup(&tcb6table, &ip6->ip6_src,
>  			    th->th_sport, &ip6->ip6_dst, th->th_dport,
>  			    m->m_pkthdr.ph_rtableid);
>  			break;
> @@ -557,10 +558,10 @@ findpcb:
>  		switch (af) {
>  #ifdef INET6
>  		case AF_INET6:
> -			inp = in6_pcblookup_listen(&tcbtable, &ip6->ip6_dst,
> +			inp = in6_pcblookup_listen(&tcb6table, &ip6->ip6_dst,
>  			    th->th_dport, m, m->m_pkthdr.ph_rtableid);
>  			break;
> -#endif /* INET6 */
> +#endif
>  		case AF_INET:
>  			inp = in_pcblookup_listen(&tcbtable, ip->ip_dst,
>  			    th->th_dport, m, m->m_pkthdr.ph_rtableid);
> @@ -3543,17 +3544,16 @@ syn_cache_get(struct sockaddr *src, stru
>  	    sizeof(oldinp->inp_seclevel));
>  #endif /* IPSEC */
>  #ifdef INET6
> -	/*
> -	 * inp still has the OLD in_pcb stuff, set the
> -	 * v6-related flags on the new guy, too.
> -	 */
> -	inp->inp_flags |= (oldinp->inp_flags & INP_IPV6);
> -	if (inp->inp_flags & INP_IPV6) {
> +	if (ISSET(inp->inp_flags, INP_IPV6)) {
> +		KASSERT(ISSET(oldinp->inp_flags, INP_IPV6));
> +
>  		inp->inp_ipv6.ip6_hlim = oldinp->inp_ipv6.ip6_hlim;
>  		inp->inp_hops = oldinp->inp_hops;
>  	} else
> -#endif /* INET6 */
> +#endif
>  	{
> +		KASSERT(!ISSET(oldinp->inp_flags, INP_IPV6));
> +
>  		inp->inp_ip.ip_ttl = oldinp->inp_ip.ip_ttl;
>  		inp->inp_options = ip_srcroute(m);
>  		if (inp->inp_options == NULL) {
> Index: netinet/tcp_subr.c
> ===================================================================
> RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/tcp_subr.c,v
> diff -u -p -r1.199 tcp_subr.c
> --- netinet/tcp_subr.c	13 Feb 2024 12:22:09 -0000	1.199
> +++ netinet/tcp_subr.c	11 Apr 2024 16:39:08 -0000
> @@ -159,6 +159,9 @@ tcp_init(void)
>  	    "sackhl", NULL);
>  	pool_sethardlimit(&sackhl_pool, tcp_sackhole_limit, NULL, 0);
>  	in_pcbinit(&tcbtable, TCB_INITIAL_HASH_SIZE);
> +#ifdef INET6
> +	in_pcbinit(&tcb6table, TCB_INITIAL_HASH_SIZE);
> +#endif
>  	tcpcounters = counters_alloc(tcps_ncounters);
>  
>  	arc4random_buf(tcp_secret, sizeof(tcp_secret));
> @@ -461,21 +464,15 @@ tcp_newtcpcb(struct inpcb *inp, int wait
>  	tp->t_pmtud_mss_acked = 0;
>  
>  #ifdef INET6
> -	/* we disallow IPv4 mapped address completely. */
> -	if ((inp->inp_flags & INP_IPV6) == 0)
> -		tp->pf = PF_INET;
> -	else
> +	if (ISSET(inp->inp_flags, INP_IPV6)) {
>  		tp->pf = PF_INET6;
> -#else
> -	tp->pf = PF_INET;
> -#endif
> -
> -#ifdef INET6
> -	if (inp->inp_flags & INP_IPV6)
>  		inp->inp_ipv6.ip6_hlim = ip6_defhlim;
> -	else
> -#endif /* INET6 */
> +	} else
> +#endif
> +	{
> +		tp->pf = PF_INET;
>  		inp->inp_ip.ip_ttl = ip_defttl;
> +	}
>  
>  	inp->inp_ppcb = (caddr_t)tp;
>  	return (tp);
> @@ -675,7 +672,7 @@ tcp6_ctlinput(int cmd, struct sockaddr *
>  		 * corresponding to the address in the ICMPv6 message
>  		 * payload.
>  		 */
> -		inp = in6_pcblookup(&tcbtable, &sa6->sin6_addr,
> +		inp = in6_pcblookup(&tcb6table, &sa6->sin6_addr,
>  		    th.th_dport, &sa6_src->sin6_addr, th.th_sport, rdomain);
>  		if (cmd == PRC_MSGSIZE) {
>  			/*
> @@ -703,7 +700,7 @@ tcp6_ctlinput(int cmd, struct sockaddr *
>  			    rdomain);
>  		in_pcbunref(inp);
>  	} else {
> -		in6_pcbnotify(&tcbtable, sa6, 0,
> +		in6_pcbnotify(&tcb6table, sa6, 0,
>  		    sa6_src, 0, rdomain, cmd, NULL, notify);
>  	}
>  }
> @@ -845,7 +842,7 @@ tcp_ctlinput(int cmd, struct sockaddr *s
>  void
>  tcp6_mtudisc_callback(struct sockaddr_in6 *sin6, u_int rdomain)
>  {
> -	in6_pcbnotify(&tcbtable, sin6, 0,
> +	in6_pcbnotify(&tcb6table, sin6, 0,
>  	    &sa6_any, 0, rdomain, PRC_MSGSIZE, NULL, tcp_mtudisc);
>  }
>  #endif /* INET6 */
> Index: netinet/tcp_usrreq.c
> ===================================================================
> RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/tcp_usrreq.c,v
> diff -u -p -r1.230 tcp_usrreq.c
> --- netinet/tcp_usrreq.c	11 Feb 2024 01:27:45 -0000	1.230
> +++ netinet/tcp_usrreq.c	11 Apr 2024 16:39:08 -0000
> @@ -171,6 +171,9 @@ const struct sysctl_bounded_args tcpctl_
>  };
>  
>  struct	inpcbtable tcbtable;
> +#ifdef INET6
> +struct	inpcbtable tcb6table;
> +#endif
>  
>  int	tcp_fill_info(struct tcpcb *, struct socket *, struct mbuf *);
>  int	tcp_ident(void *, size_t *, void *, size_t, int);
> @@ -317,7 +320,7 @@ tcp_ctloutput(int op, struct socket *so,
>  		if (ISSET(inp->inp_flags, INP_IPV6))
>  			error = ip6_ctloutput(op, so, level, optname, m);
>  		else
> -#endif /* INET6 */
> +#endif
>  			error = ip_ctloutput(op, so, level, optname, m);
>  		return (error);
>  	}
> @@ -452,6 +455,7 @@ tcp_ctloutput(int op, struct socket *so,
>  int
>  tcp_attach(struct socket *so, int proto, int wait)
>  {
> +	struct inpcbtable *table;
>  	struct tcpcb *tp;
>  	struct inpcb *inp;
>  	int error;
> @@ -467,7 +471,13 @@ tcp_attach(struct socket *so, int proto,
>  	}
>  
>  	NET_ASSERT_LOCKED();
> -	error = in_pcballoc(so, &tcbtable, wait);
> +#ifdef INET6
> +	if (so->so_proto->pr_domain->dom_family == PF_INET6)
> +		table = &tcb6table;
> +	else
> +#endif
> +		table = &tcbtable;
> +	error = in_pcballoc(so, table, wait);
>  	if (error)
>  		return (error);
>  	inp = sotoinpcb(so);
> @@ -482,14 +492,11 @@ tcp_attach(struct socket *so, int proto,
>  	}
>  	tp->t_state = TCPS_CLOSED;
>  #ifdef INET6
> -	/* we disallow IPv4 mapped address completely. */
> -	if (inp->inp_flags & INP_IPV6)
> +	if (ISSET(inp->inp_flags, INP_IPV6))
>  		tp->pf = PF_INET6;
>  	else
> -		tp->pf = PF_INET;
> -#else
> -	tp->pf = PF_INET;
>  #endif
> +		tp->pf = PF_INET;
>  	if ((so->so_options & SO_LINGER) && so->so_linger == 0)
>  		so->so_linger = TCP_LINGERTIME;
>  
> @@ -619,7 +626,7 @@ tcp_connect(struct socket *so, struct mb
>  	}
>  
>  #ifdef INET6
> -	if (inp->inp_flags & INP_IPV6) {
> +	if (ISSET(inp->inp_flags, INP_IPV6)) {
>  		struct sockaddr_in6 *sin6;
>  
>  		if ((error = in6_nam2sin6(nam, &sin6)))
> @@ -630,7 +637,7 @@ tcp_connect(struct socket *so, struct mb
>  			goto out;
>  		}
>  	} else
> -#endif /* INET6 */
> +#endif
>  	{
>  		struct sockaddr_in *sin;
>  
> @@ -1148,7 +1155,7 @@ tcp_ident(void *oldp, size_t *oldlenp, v
>  	switch (tir.faddr.ss_family) {
>  #ifdef INET6
>  	case AF_INET6:
> -		inp = in6_pcblookup(&tcbtable, &f6,
> +		inp = in6_pcblookup(&tcb6table, &f6,
>  		    fin6->sin6_port, &l6, lin6->sin6_port, tir.rdomain);
>  		break;
>  #endif
> @@ -1175,7 +1182,7 @@ tcp_ident(void *oldp, size_t *oldlenp, v
>  		switch (tir.faddr.ss_family) {
>  #ifdef INET6
>  		case AF_INET6:
> -			inp = in6_pcblookup_listen(&tcbtable,
> +			inp = in6_pcblookup_listen(&tcb6table,
>  			    &l6, lin6->sin6_port, NULL, tir.rdomain);
>  			break;
>  #endif
> Index: netinet/tcp_var.h
> ===================================================================
> RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/tcp_var.h,v
> diff -u -p -r1.176 tcp_var.h
> --- netinet/tcp_var.h	13 Feb 2024 12:22:09 -0000	1.176
> +++ netinet/tcp_var.h	11 Apr 2024 16:39:08 -0000
> @@ -676,7 +676,7 @@ extern	const struct pr_usrreqs tcp6_usrr
>  #endif
>  
>  extern	struct pool tcpcb_pool;
> -extern	struct inpcbtable tcbtable;	/* head of queue of active tcpcb's */
> +extern	struct inpcbtable tcbtable, tcb6table;	/* queue of active tcpcb's */
>  extern	int tcp_do_rfc1323;	/* enabled/disabled? */
>  extern	int tcptv_keep_init;	/* [N] time to keep alive initial SYN packet */
>  extern	int tcp_mssdflt;	/* default maximum segment size */
> Index: netinet/udp_usrreq.c
> ===================================================================
> RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/udp_usrreq.c,v
> diff -u -p -r1.318 udp_usrreq.c
> --- netinet/udp_usrreq.c	11 Feb 2024 18:14:26 -0000	1.318
> +++ netinet/udp_usrreq.c	11 Apr 2024 16:39:08 -0000
> @@ -1117,10 +1117,10 @@ udp_attach(struct socket *so, int proto,
>  	if ((error = in_pcballoc(so, table, wait)))
>  		return error;
>  #ifdef INET6
> -	if (sotoinpcb(so)->inp_flags & INP_IPV6)
> +	if (ISSET(sotoinpcb(so)->inp_flags, INP_IPV6))
>  		sotoinpcb(so)->inp_ipv6.ip6_hlim = ip6_defhlim;
>  	else
> -#endif /* INET6 */
> +#endif
>  		sotoinpcb(so)->inp_ip.ip_ttl = ip_defttl;
>  	return 0;
>  }
> @@ -1184,11 +1184,11 @@ udp_connect(struct socket *so, struct mb
>  	soassertlocked(so);
>  
>  #ifdef INET6
> -	if (inp->inp_flags & INP_IPV6) {
> +	if (ISSET(inp->inp_flags, INP_IPV6)) {
>  		if (!IN6_IS_ADDR_UNSPECIFIED(&inp->inp_faddr6))
>  			return (EISCONN);
>  	} else
> -#endif /* INET6 */
> +#endif
>  	{
>  		if (inp->inp_faddr.s_addr != INADDR_ANY)
>  			return (EISCONN);
> @@ -1209,11 +1209,11 @@ udp_disconnect(struct socket *so)
>  	soassertlocked(so);
>  
>  #ifdef INET6
> -	if (inp->inp_flags & INP_IPV6) {
> +	if (ISSET(inp->inp_flags, INP_IPV6)) {
>  		if (IN6_IS_ADDR_UNSPECIFIED(&inp->inp_faddr6))
>  			return (ENOTCONN);
>  	} else
> -#endif /* INET6 */
> +#endif
>  	{
>  		if (inp->inp_faddr.s_addr == INADDR_ANY)
>  			return (ENOTCONN);
> @@ -1251,7 +1251,7 @@ udp_send(struct socket *so, struct mbuf 
>  				mtod(addr, struct sockaddr *));
>  		else
>  #ifdef INET6
> -		if (inp->inp_flags & INP_IPV6)
> +		if (ISSET(inp->inp_flags, INP_IPV6))
>  			session =
>  			    pipex_l2tp_userland_lookup_session_ipv6(
>  				m, inp->inp_faddr6);
> Index: netinet6/in6_pcb.c
> ===================================================================
> RCS file: /data/mirror/openbsd/cvs/src/sys/netinet6/in6_pcb.c,v
> diff -u -p -r1.143 in6_pcb.c
> --- netinet6/in6_pcb.c	31 Mar 2024 15:53:12 -0000	1.143
> +++ netinet6/in6_pcb.c	11 Apr 2024 16:39:08 -0000
> @@ -479,8 +479,7 @@ in6_pcbnotify(struct inpcbtable *table, 
>  	rw_enter_write(&table->inpt_notify);
>  	mtx_enter(&table->inpt_mtx);
>  	TAILQ_FOREACH(inp, &table->inpt_queue, inp_queue) {
> -		if (!ISSET(inp->inp_flags, INP_IPV6))
> -			continue;
> +		KASSERT(ISSET(inp->inp_flags, INP_IPV6));
>  
>  		/*
>  		 * Under the following condition, notify of redirects
> @@ -580,8 +579,8 @@ in6_pcbhash_lookup(struct inpcbtable *ta
>  
>  	head = &table->inpt_hashtbl[hash & table->inpt_mask];
>  	LIST_FOREACH(inp, head, inp_hash) {
> -		if (!ISSET(inp->inp_flags, INP_IPV6))
> -			continue;
> +		KASSERT(ISSET(inp->inp_flags, INP_IPV6));
> +
>  		if (inp->inp_fport == fport && inp->inp_lport == lport &&
>  		    IN6_ARE_ADDR_EQUAL(&inp->inp_faddr6, faddr) &&
>  		    IN6_ARE_ADDR_EQUAL(&inp->inp_laddr6, laddr) &&
>