Download raw body.
split TCP incpb table in IPv4 and IPv6
Hi,
A while ago I splitted the UDP inpcb table in v4 and v6 part. Idea
was to reduce contention on table lock. Same can be done with TCP.
Currently TCP runs with exclusive netlock, so there is not much
difference regarding the lock. But with two hash tables each one
gets smaller. Also we don't need an if around INP_IPV6, but can
assert that it is correct.
ok?
bluhm
Index: kern/kern_sysctl.c
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/kern/kern_sysctl.c,v
diff -u -p -r1.426 kern_sysctl.c
--- kern/kern_sysctl.c 29 Mar 2024 06:50:06 -0000 1.426
+++ kern/kern_sysctl.c 11 Apr 2024 16:39:08 -0000
@@ -1482,6 +1482,12 @@ sysctl_file(int *name, u_int namelen, ch
TAILQ_FOREACH(inp, &tcbtable.inpt_queue, inp_queue)
FILLSO(inp->inp_socket);
mtx_leave(&tcbtable.inpt_mtx);
+#ifdef INET6
+ mtx_enter(&tcb6table.inpt_mtx);
+ TAILQ_FOREACH(inp, &tcb6table.inpt_queue, inp_queue)
+ FILLSO(inp->inp_socket);
+ mtx_leave(&tcb6table.inpt_mtx);
+#endif
mtx_enter(&udbtable.inpt_mtx);
TAILQ_FOREACH(inp, &udbtable.inpt_queue, inp_queue)
FILLSO(inp->inp_socket);
Index: net/pf.c
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/net/pf.c,v
diff -u -p -r1.1193 pf.c
--- net/pf.c 10 Jan 2024 16:44:30 -0000 1.1193
+++ net/pf.c 11 Apr 2024 16:39:08 -0000
@@ -3788,7 +3788,7 @@ pf_socket_lookup(struct pf_pdesc *pd)
{
struct pf_addr *saddr, *daddr;
u_int16_t sport, dport;
- struct inpcbtable *tb;
+ struct inpcbtable *table;
struct inpcb *inp;
pd->lookup.uid = -1;
@@ -3800,14 +3800,14 @@ pf_socket_lookup(struct pf_pdesc *pd)
dport = pd->hdr.tcp.th_dport;
PF_ASSERT_LOCKED();
NET_ASSERT_LOCKED();
- tb = &tcbtable;
+ table = &tcbtable;
break;
case IPPROTO_UDP:
sport = pd->hdr.udp.uh_sport;
dport = pd->hdr.udp.uh_dport;
PF_ASSERT_LOCKED();
NET_ASSERT_LOCKED();
- tb = &udbtable;
+ table = &udbtable;
break;
default:
return (-1);
@@ -3830,10 +3830,10 @@ pf_socket_lookup(struct pf_pdesc *pd)
* Fails when rtable is changed while evaluating the ruleset
* The socket looked up will not match the one hit in the end.
*/
- inp = in_pcblookup(tb, saddr->v4, sport, daddr->v4, dport,
+ inp = in_pcblookup(table, saddr->v4, sport, daddr->v4, dport,
pd->rdomain);
if (inp == NULL) {
- inp = in_pcblookup_listen(tb, daddr->v4, dport,
+ inp = in_pcblookup_listen(table, daddr->v4, dport,
NULL, pd->rdomain);
if (inp == NULL)
return (-1);
@@ -3842,11 +3842,13 @@ pf_socket_lookup(struct pf_pdesc *pd)
#ifdef INET6
case AF_INET6:
if (pd->virtual_proto == IPPROTO_UDP)
- tb = &udb6table;
- inp = in6_pcblookup(tb, &saddr->v6, sport, &daddr->v6,
+ table = &udb6table;
+ if (pd->virtual_proto == IPPROTO_TCP)
+ table = &tcb6table;
+ inp = in6_pcblookup(table, &saddr->v6, sport, &daddr->v6,
dport, pd->rdomain);
if (inp == NULL) {
- inp = in6_pcblookup_listen(tb, &daddr->v6, dport,
+ inp = in6_pcblookup_listen(table, &daddr->v6, dport,
NULL, pd->rdomain);
if (inp == NULL)
return (-1);
Index: netinet/in_pcb.c
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/in_pcb.c,v
diff -u -p -r1.299 in_pcb.c
--- netinet/in_pcb.c 31 Mar 2024 15:53:12 -0000 1.299
+++ netinet/in_pcb.c 11 Apr 2024 16:39:08 -0000
@@ -743,10 +743,8 @@ in_pcbnotifyall(struct inpcbtable *table
rw_enter_write(&table->inpt_notify);
mtx_enter(&table->inpt_mtx);
TAILQ_FOREACH(inp, &table->inpt_queue, inp_queue) {
-#ifdef INET6
- if (ISSET(inp->inp_flags, INP_IPV6))
- continue;
-#endif
+ KASSERT(!ISSET(inp->inp_flags, INP_IPV6));
+
if (inp->inp_faddr.s_addr != dst->sin_addr.s_addr ||
rtable_l2(inp->inp_rtableid) != rdomain) {
continue;
@@ -852,8 +850,7 @@ in_pcblookup_local_lock(struct inpcbtabl
wildcard = 0;
#ifdef INET6
if (ISSET(flags, INPLOOKUP_IPV6)) {
- if (!ISSET(inp->inp_flags, INP_IPV6))
- continue;
+ KASSERT(ISSET(inp->inp_flags, INP_IPV6));
if (!IN6_IS_ADDR_UNSPECIFIED(&inp->inp_faddr6))
wildcard++;
@@ -869,10 +866,7 @@ in_pcblookup_local_lock(struct inpcbtabl
} else
#endif /* INET6 */
{
-#ifdef INET6
- if (ISSET(inp->inp_flags, INP_IPV6))
- continue;
-#endif /* INET6 */
+ KASSERT(!ISSET(inp->inp_flags, INP_IPV6));
if (inp->inp_faddr.s_addr != INADDR_ANY)
wildcard++;
@@ -1032,7 +1026,7 @@ in_pcbhash_insert(struct inpcb *inp)
&inp->inp_faddr6, inp->inp_fport,
&inp->inp_laddr6, inp->inp_lport);
else
-#endif /* INET6 */
+#endif
hash = in_pcbhash(table, rtable_l2(inp->inp_rtableid),
&inp->inp_faddr, inp->inp_fport,
&inp->inp_laddr, inp->inp_lport);
@@ -1052,10 +1046,8 @@ in_pcbhash_lookup(struct inpcbtable *tab
head = &table->inpt_hashtbl[hash & table->inpt_mask];
LIST_FOREACH(inp, head, inp_hash) {
-#ifdef INET6
- if (ISSET(inp->inp_flags, INP_IPV6))
- continue;
-#endif
+ KASSERT(!ISSET(inp->inp_flags, INP_IPV6));
+
if (inp->inp_fport == fport && inp->inp_lport == lport &&
inp->inp_faddr.s_addr == faddr->s_addr &&
inp->inp_laddr.s_addr == laddr->s_addr &&
Index: netinet/tcp_input.c
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/tcp_input.c,v
diff -u -p -r1.402 tcp_input.c
--- netinet/tcp_input.c 10 Apr 2024 22:10:03 -0000 1.402
+++ netinet/tcp_input.c 11 Apr 2024 16:44:02 -0000
@@ -140,7 +140,8 @@ struct timeval tcp_ackdrop_ppslim_last;
#ifdef INET6
#define ND6_HINT(tp) \
do { \
- if (tp && tp->t_inpcb && (tp->t_inpcb->inp_flags & INP_IPV6) && \
+ if (tp && tp->t_inpcb && \
+ ISSET(tp->t_inpcb->inp_flags, INP_IPV6) && \
rtisvalid(tp->t_inpcb->inp_route.ro_rt)) { \
nd6_nud_hint(tp->t_inpcb->inp_route.ro_rt); \
} \
@@ -540,7 +541,7 @@ findpcb:
switch (af) {
#ifdef INET6
case AF_INET6:
- inp = in6_pcblookup(&tcbtable, &ip6->ip6_src,
+ inp = in6_pcblookup(&tcb6table, &ip6->ip6_src,
th->th_sport, &ip6->ip6_dst, th->th_dport,
m->m_pkthdr.ph_rtableid);
break;
@@ -557,10 +558,10 @@ findpcb:
switch (af) {
#ifdef INET6
case AF_INET6:
- inp = in6_pcblookup_listen(&tcbtable, &ip6->ip6_dst,
+ inp = in6_pcblookup_listen(&tcb6table, &ip6->ip6_dst,
th->th_dport, m, m->m_pkthdr.ph_rtableid);
break;
-#endif /* INET6 */
+#endif
case AF_INET:
inp = in_pcblookup_listen(&tcbtable, ip->ip_dst,
th->th_dport, m, m->m_pkthdr.ph_rtableid);
@@ -3543,17 +3544,16 @@ syn_cache_get(struct sockaddr *src, stru
sizeof(oldinp->inp_seclevel));
#endif /* IPSEC */
#ifdef INET6
- /*
- * inp still has the OLD in_pcb stuff, set the
- * v6-related flags on the new guy, too.
- */
- inp->inp_flags |= (oldinp->inp_flags & INP_IPV6);
- if (inp->inp_flags & INP_IPV6) {
+ if (ISSET(inp->inp_flags, INP_IPV6)) {
+ KASSERT(ISSET(oldinp->inp_flags, INP_IPV6));
+
inp->inp_ipv6.ip6_hlim = oldinp->inp_ipv6.ip6_hlim;
inp->inp_hops = oldinp->inp_hops;
} else
-#endif /* INET6 */
+#endif
{
+ KASSERT(!ISSET(oldinp->inp_flags, INP_IPV6));
+
inp->inp_ip.ip_ttl = oldinp->inp_ip.ip_ttl;
inp->inp_options = ip_srcroute(m);
if (inp->inp_options == NULL) {
Index: netinet/tcp_subr.c
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/tcp_subr.c,v
diff -u -p -r1.199 tcp_subr.c
--- netinet/tcp_subr.c 13 Feb 2024 12:22:09 -0000 1.199
+++ netinet/tcp_subr.c 11 Apr 2024 16:39:08 -0000
@@ -159,6 +159,9 @@ tcp_init(void)
"sackhl", NULL);
pool_sethardlimit(&sackhl_pool, tcp_sackhole_limit, NULL, 0);
in_pcbinit(&tcbtable, TCB_INITIAL_HASH_SIZE);
+#ifdef INET6
+ in_pcbinit(&tcb6table, TCB_INITIAL_HASH_SIZE);
+#endif
tcpcounters = counters_alloc(tcps_ncounters);
arc4random_buf(tcp_secret, sizeof(tcp_secret));
@@ -461,21 +464,15 @@ tcp_newtcpcb(struct inpcb *inp, int wait
tp->t_pmtud_mss_acked = 0;
#ifdef INET6
- /* we disallow IPv4 mapped address completely. */
- if ((inp->inp_flags & INP_IPV6) == 0)
- tp->pf = PF_INET;
- else
+ if (ISSET(inp->inp_flags, INP_IPV6)) {
tp->pf = PF_INET6;
-#else
- tp->pf = PF_INET;
-#endif
-
-#ifdef INET6
- if (inp->inp_flags & INP_IPV6)
inp->inp_ipv6.ip6_hlim = ip6_defhlim;
- else
-#endif /* INET6 */
+ } else
+#endif
+ {
+ tp->pf = PF_INET;
inp->inp_ip.ip_ttl = ip_defttl;
+ }
inp->inp_ppcb = (caddr_t)tp;
return (tp);
@@ -675,7 +672,7 @@ tcp6_ctlinput(int cmd, struct sockaddr *
* corresponding to the address in the ICMPv6 message
* payload.
*/
- inp = in6_pcblookup(&tcbtable, &sa6->sin6_addr,
+ inp = in6_pcblookup(&tcb6table, &sa6->sin6_addr,
th.th_dport, &sa6_src->sin6_addr, th.th_sport, rdomain);
if (cmd == PRC_MSGSIZE) {
/*
@@ -703,7 +700,7 @@ tcp6_ctlinput(int cmd, struct sockaddr *
rdomain);
in_pcbunref(inp);
} else {
- in6_pcbnotify(&tcbtable, sa6, 0,
+ in6_pcbnotify(&tcb6table, sa6, 0,
sa6_src, 0, rdomain, cmd, NULL, notify);
}
}
@@ -845,7 +842,7 @@ tcp_ctlinput(int cmd, struct sockaddr *s
void
tcp6_mtudisc_callback(struct sockaddr_in6 *sin6, u_int rdomain)
{
- in6_pcbnotify(&tcbtable, sin6, 0,
+ in6_pcbnotify(&tcb6table, sin6, 0,
&sa6_any, 0, rdomain, PRC_MSGSIZE, NULL, tcp_mtudisc);
}
#endif /* INET6 */
Index: netinet/tcp_usrreq.c
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/tcp_usrreq.c,v
diff -u -p -r1.230 tcp_usrreq.c
--- netinet/tcp_usrreq.c 11 Feb 2024 01:27:45 -0000 1.230
+++ netinet/tcp_usrreq.c 11 Apr 2024 16:39:08 -0000
@@ -171,6 +171,9 @@ const struct sysctl_bounded_args tcpctl_
};
struct inpcbtable tcbtable;
+#ifdef INET6
+struct inpcbtable tcb6table;
+#endif
int tcp_fill_info(struct tcpcb *, struct socket *, struct mbuf *);
int tcp_ident(void *, size_t *, void *, size_t, int);
@@ -317,7 +320,7 @@ tcp_ctloutput(int op, struct socket *so,
if (ISSET(inp->inp_flags, INP_IPV6))
error = ip6_ctloutput(op, so, level, optname, m);
else
-#endif /* INET6 */
+#endif
error = ip_ctloutput(op, so, level, optname, m);
return (error);
}
@@ -452,6 +455,7 @@ tcp_ctloutput(int op, struct socket *so,
int
tcp_attach(struct socket *so, int proto, int wait)
{
+ struct inpcbtable *table;
struct tcpcb *tp;
struct inpcb *inp;
int error;
@@ -467,7 +471,13 @@ tcp_attach(struct socket *so, int proto,
}
NET_ASSERT_LOCKED();
- error = in_pcballoc(so, &tcbtable, wait);
+#ifdef INET6
+ if (so->so_proto->pr_domain->dom_family == PF_INET6)
+ table = &tcb6table;
+ else
+#endif
+ table = &tcbtable;
+ error = in_pcballoc(so, table, wait);
if (error)
return (error);
inp = sotoinpcb(so);
@@ -482,14 +492,11 @@ tcp_attach(struct socket *so, int proto,
}
tp->t_state = TCPS_CLOSED;
#ifdef INET6
- /* we disallow IPv4 mapped address completely. */
- if (inp->inp_flags & INP_IPV6)
+ if (ISSET(inp->inp_flags, INP_IPV6))
tp->pf = PF_INET6;
else
- tp->pf = PF_INET;
-#else
- tp->pf = PF_INET;
#endif
+ tp->pf = PF_INET;
if ((so->so_options & SO_LINGER) && so->so_linger == 0)
so->so_linger = TCP_LINGERTIME;
@@ -619,7 +626,7 @@ tcp_connect(struct socket *so, struct mb
}
#ifdef INET6
- if (inp->inp_flags & INP_IPV6) {
+ if (ISSET(inp->inp_flags, INP_IPV6)) {
struct sockaddr_in6 *sin6;
if ((error = in6_nam2sin6(nam, &sin6)))
@@ -630,7 +637,7 @@ tcp_connect(struct socket *so, struct mb
goto out;
}
} else
-#endif /* INET6 */
+#endif
{
struct sockaddr_in *sin;
@@ -1148,7 +1155,7 @@ tcp_ident(void *oldp, size_t *oldlenp, v
switch (tir.faddr.ss_family) {
#ifdef INET6
case AF_INET6:
- inp = in6_pcblookup(&tcbtable, &f6,
+ inp = in6_pcblookup(&tcb6table, &f6,
fin6->sin6_port, &l6, lin6->sin6_port, tir.rdomain);
break;
#endif
@@ -1175,7 +1182,7 @@ tcp_ident(void *oldp, size_t *oldlenp, v
switch (tir.faddr.ss_family) {
#ifdef INET6
case AF_INET6:
- inp = in6_pcblookup_listen(&tcbtable,
+ inp = in6_pcblookup_listen(&tcb6table,
&l6, lin6->sin6_port, NULL, tir.rdomain);
break;
#endif
Index: netinet/tcp_var.h
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/tcp_var.h,v
diff -u -p -r1.176 tcp_var.h
--- netinet/tcp_var.h 13 Feb 2024 12:22:09 -0000 1.176
+++ netinet/tcp_var.h 11 Apr 2024 16:39:08 -0000
@@ -676,7 +676,7 @@ extern const struct pr_usrreqs tcp6_usrr
#endif
extern struct pool tcpcb_pool;
-extern struct inpcbtable tcbtable; /* head of queue of active tcpcb's */
+extern struct inpcbtable tcbtable, tcb6table; /* queue of active tcpcb's */
extern int tcp_do_rfc1323; /* enabled/disabled? */
extern int tcptv_keep_init; /* [N] time to keep alive initial SYN packet */
extern int tcp_mssdflt; /* default maximum segment size */
Index: netinet/udp_usrreq.c
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/udp_usrreq.c,v
diff -u -p -r1.318 udp_usrreq.c
--- netinet/udp_usrreq.c 11 Feb 2024 18:14:26 -0000 1.318
+++ netinet/udp_usrreq.c 11 Apr 2024 16:39:08 -0000
@@ -1117,10 +1117,10 @@ udp_attach(struct socket *so, int proto,
if ((error = in_pcballoc(so, table, wait)))
return error;
#ifdef INET6
- if (sotoinpcb(so)->inp_flags & INP_IPV6)
+ if (ISSET(sotoinpcb(so)->inp_flags, INP_IPV6))
sotoinpcb(so)->inp_ipv6.ip6_hlim = ip6_defhlim;
else
-#endif /* INET6 */
+#endif
sotoinpcb(so)->inp_ip.ip_ttl = ip_defttl;
return 0;
}
@@ -1184,11 +1184,11 @@ udp_connect(struct socket *so, struct mb
soassertlocked(so);
#ifdef INET6
- if (inp->inp_flags & INP_IPV6) {
+ if (ISSET(inp->inp_flags, INP_IPV6)) {
if (!IN6_IS_ADDR_UNSPECIFIED(&inp->inp_faddr6))
return (EISCONN);
} else
-#endif /* INET6 */
+#endif
{
if (inp->inp_faddr.s_addr != INADDR_ANY)
return (EISCONN);
@@ -1209,11 +1209,11 @@ udp_disconnect(struct socket *so)
soassertlocked(so);
#ifdef INET6
- if (inp->inp_flags & INP_IPV6) {
+ if (ISSET(inp->inp_flags, INP_IPV6)) {
if (IN6_IS_ADDR_UNSPECIFIED(&inp->inp_faddr6))
return (ENOTCONN);
} else
-#endif /* INET6 */
+#endif
{
if (inp->inp_faddr.s_addr == INADDR_ANY)
return (ENOTCONN);
@@ -1251,7 +1251,7 @@ udp_send(struct socket *so, struct mbuf
mtod(addr, struct sockaddr *));
else
#ifdef INET6
- if (inp->inp_flags & INP_IPV6)
+ if (ISSET(inp->inp_flags, INP_IPV6))
session =
pipex_l2tp_userland_lookup_session_ipv6(
m, inp->inp_faddr6);
Index: netinet6/in6_pcb.c
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/netinet6/in6_pcb.c,v
diff -u -p -r1.143 in6_pcb.c
--- netinet6/in6_pcb.c 31 Mar 2024 15:53:12 -0000 1.143
+++ netinet6/in6_pcb.c 11 Apr 2024 16:39:08 -0000
@@ -479,8 +479,7 @@ in6_pcbnotify(struct inpcbtable *table,
rw_enter_write(&table->inpt_notify);
mtx_enter(&table->inpt_mtx);
TAILQ_FOREACH(inp, &table->inpt_queue, inp_queue) {
- if (!ISSET(inp->inp_flags, INP_IPV6))
- continue;
+ KASSERT(ISSET(inp->inp_flags, INP_IPV6));
/*
* Under the following condition, notify of redirects
@@ -580,8 +579,8 @@ in6_pcbhash_lookup(struct inpcbtable *ta
head = &table->inpt_hashtbl[hash & table->inpt_mask];
LIST_FOREACH(inp, head, inp_hash) {
- if (!ISSET(inp->inp_flags, INP_IPV6))
- continue;
+ KASSERT(ISSET(inp->inp_flags, INP_IPV6));
+
if (inp->inp_fport == fport && inp->inp_lport == lport &&
IN6_ARE_ADDR_EQUAL(&inp->inp_faddr6, faddr) &&
IN6_ARE_ADDR_EQUAL(&inp->inp_laddr6, laddr) &&
split TCP incpb table in IPv4 and IPv6