Download raw body.
netstat counter for async socket close
On Wed, Jun 04, 2025 at 04:17:39PM +0200, Alexander Bluhm wrote:
> Hi,
>
> Currently soclose() is protected by exclusive netlock. Except for
> TCP that means an inpcb reference always contains a socket pointer
> in inp_socket that points back to the incpb.
>
> When we start unlocking soclose(), this is no longer the case as
> in_pcbdetach() can set so_pcb to NULL running on another CPU. For
> TCP input we take the socket lock for every packet with in_pcbsolock().
> If that returns NULL another CPU has called in_pcbdetach() due to
> a reset packet or timeout. Instead of reusing the tcps_noport
> counter, I would like to introduce tcps_closing for such packets.
>
> For datagram and raw sockets I want to avoid a per socket lock as
> this would kill the packet input performance. The main problem
> here is that KASSERT(sotoinpcb(inp->inp_socket) == inp) could be
> false as so_pcb is NULL. Only a weak check with READ_ONCE() is
> possible without lock. I think it is still worthy to drop the
> packet early and check the KASSERT(). If the socket goes away
> later, this is not a problem. Packets will be purged in sorele()
> after inpcb has been released.
>
> This diff adds the closing counter to all protocols. Also the
> KASSERT(pcb == inp) is added to all socket lookups. It is aware
> that other CPU can set so_pcb to NULL and then returns early.
mvs@ noticed that counting closing sockets in the broadcast loops
does not make sense.
So here is a new diff, that checks so_pcb is valid, but only counts
it for unicast udp, tcp and divert sockets. Closing raw IP sockets
are just ignored.
Maybe we should not add the (pcb == NULL) check and (pcb == inp)
asserts in the broadcast and multicast loops. Don't know.
ok?
bluhm
Index: sys/netinet/ip_divert.c
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/ip_divert.c,v
diff -u -p -r1.105 ip_divert.c
--- sys/netinet/ip_divert.c 6 Jun 2025 13:13:37 -0000 1.105
+++ sys/netinet/ip_divert.c 6 Jun 2025 17:32:31 -0000
@@ -178,6 +178,7 @@ void
divert_packet(struct mbuf *m, int dir, u_int16_t divert_port)
{
struct inpcb *inp = NULL;
+ void *pcb;
struct socket *so;
struct sockaddr_in sin;
@@ -201,6 +202,12 @@ divert_packet(struct mbuf *m, int dir, u
divstat_inc(divs_noport);
goto bad;
}
+ pcb = READ_ONCE(inp->inp_socket->so_pcb);
+ if (pcb == NULL) {
+ divstat_inc(divs_closing);
+ goto bad;
+ }
+ KASSERT(pcb == inp);
memset(&sin, 0, sizeof(sin));
sin.sin_family = AF_INET;
Index: sys/netinet/ip_divert.h
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/ip_divert.h,v
diff -u -p -r1.28 ip_divert.h
--- sys/netinet/ip_divert.h 6 Jun 2025 13:13:37 -0000 1.28
+++ sys/netinet/ip_divert.h 6 Jun 2025 17:32:31 -0000
@@ -22,6 +22,7 @@
struct divstat {
u_long divs_ipackets; /* total input packets */
u_long divs_noport; /* no socket on port */
+ u_long divs_closing; /* inpcb exists, socket is closing */
u_long divs_fullsock; /* not delivered, input socket full */
u_long divs_opackets; /* total output packets */
u_long divs_errors; /* generic errors */
@@ -53,6 +54,7 @@ struct divstat {
enum divstat_counters {
divs_ipackets,
divs_noport,
+ divs_closing,
divs_fullsock,
divs_opackets,
divs_errors,
Index: sys/netinet/raw_ip.c
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/raw_ip.c,v
diff -u -p -r1.166 raw_ip.c
--- sys/netinet/raw_ip.c 11 Mar 2025 15:31:03 -0000 1.166
+++ sys/netinet/raw_ip.c 6 Jun 2025 17:32:31 -0000
@@ -135,6 +135,7 @@ rip_input(struct mbuf **mp, int *offp, i
struct ip *ip = mtod(m, struct ip *);
struct inpcb_iterator iter = { .inp_table = NULL };
struct inpcb *inp, *last;
+ void *pcb;
struct in_addr *key;
struct sockaddr_in ripsrc;
@@ -169,6 +170,10 @@ rip_input(struct mbuf **mp, int *offp, i
while ((inp = in_pcb_iterator(&rawcbtable, inp, &iter)) != NULL) {
KASSERT(!ISSET(inp->inp_flags, INP_IPV6));
+ pcb = READ_ONCE(inp->inp_socket->so_pcb);
+ if (pcb == NULL)
+ continue;
+ KASSERT(pcb == inp);
/*
* Packet must not be inserted after disconnected wakeup
* call. To avoid race, check again when holding receive
Index: sys/netinet/tcp_input.c
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/tcp_input.c,v
diff -u -p -r1.450 tcp_input.c
--- sys/netinet/tcp_input.c 3 Jun 2025 16:51:26 -0000 1.450
+++ sys/netinet/tcp_input.c 6 Jun 2025 17:32:31 -0000
@@ -661,10 +661,9 @@ findpcb:
so = in_pcbsolock(inp);
}
if (so == NULL) {
- tcpstat_inc(tcps_noport);
+ tcpstat_inc(tcps_closing);
goto dropwithreset_ratelim;
}
-
KASSERT(sotoinpcb(inp->inp_socket) == inp);
KASSERT(intotcpcb(inp) == NULL || intotcpcb(inp)->t_inpcb == inp);
soassertlocked(inp->inp_socket);
Index: sys/netinet/tcp_var.h
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/tcp_var.h,v
diff -u -p -r1.191 tcp_var.h
--- sys/netinet/tcp_var.h 7 May 2025 14:10:19 -0000 1.191
+++ sys/netinet/tcp_var.h 6 Jun 2025 17:32:31 -0000
@@ -393,6 +393,7 @@ struct tcpstat {
u_int32_t tcps_pcbhashmiss; /* input packets missing pcb hash */
u_int32_t tcps_noport; /* no socket on port */
+ u_int32_t tcps_closing; /* inpcb exists, socket is closing */
u_int32_t tcps_badsyn; /* SYN packet with src==dst rcv'ed */
u_int32_t tcps_dropsyn; /* SYN packet dropped */
@@ -583,6 +584,7 @@ enum tcpstat_counters {
tcps_preddat,
tcps_pcbhashmiss,
tcps_noport,
+ tcps_closing,
tcps_badsyn,
tcps_dropsyn,
tcps_rcvbadsig,
Index: sys/netinet/udp_usrreq.c
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/udp_usrreq.c,v
diff -u -p -r1.341 udp_usrreq.c
--- sys/netinet/udp_usrreq.c 3 Jun 2025 16:51:26 -0000 1.341
+++ sys/netinet/udp_usrreq.c 6 Jun 2025 17:32:31 -0000
@@ -198,6 +198,7 @@ udp_input(struct mbuf **mp, int *offp, i
struct ip *ip = NULL;
struct udphdr *uh;
struct inpcb *inp = NULL;
+ void *pcb;
struct ip save_ip;
int len;
u_int16_t savesum;
@@ -419,6 +420,10 @@ udp_input(struct mbuf **mp, int *offp, i
else
KASSERT(!ISSET(inp->inp_flags, INP_IPV6));
+ pcb = READ_ONCE(inp->inp_socket->so_pcb);
+ if (pcb == NULL)
+ continue;
+ KASSERT(pcb == inp);
if (inp->inp_socket->so_rcv.sb_state & SS_CANTRCVMORE)
continue;
if (rtable_l2(inp->inp_rtableid) !=
@@ -596,7 +601,12 @@ udp_input(struct mbuf **mp, int *offp, i
return IPPROTO_DONE;
}
- KASSERT(sotoinpcb(inp->inp_socket) == inp);
+ pcb = READ_ONCE(inp->inp_socket->so_pcb);
+ if (pcb == NULL) {
+ udpstat_inc(udps_closing);
+ goto bad;
+ }
+ KASSERT(pcb == inp);
soassertlocked_readonly(inp->inp_socket);
#ifdef INET6
Index: sys/netinet/udp_var.h
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/udp_var.h,v
diff -u -p -r1.53 udp_var.h
--- sys/netinet/udp_var.h 2 Mar 2025 21:28:32 -0000 1.53
+++ sys/netinet/udp_var.h 6 Jun 2025 17:32:31 -0000
@@ -61,6 +61,7 @@ struct udpstat {
u_long udps_badlen; /* data length larger than packet */
u_long udps_noport; /* no socket on port */
u_long udps_noportbcast; /* of above, arrived as broadcast */
+ u_long udps_closing; /* inpcb exists, socket is closing */
u_long udps_nosec; /* dropped for lack of ipsec */
u_long udps_fullsock; /* not delivered, input socket full */
u_long udps_pcbhashmiss; /* input packets missing pcb hash */
@@ -104,6 +105,7 @@ enum udpstat_counters {
udps_badlen, /* data length larger than packet */
udps_noport, /* no socket on port */
udps_noportbcast, /* of above, arrived as broadcast */
+ udps_closing, /* inpcb exists, socket is closing */
udps_nosec, /* dropped for lack of ipsec */
udps_fullsock, /* not delivered, input socket full */
udps_pcbhashmiss, /* input packets missing pcb hash */
Index: sys/netinet6/ip6_divert.c
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/netinet6/ip6_divert.c,v
diff -u -p -r1.104 ip6_divert.c
--- sys/netinet6/ip6_divert.c 6 Jun 2025 13:13:37 -0000 1.104
+++ sys/netinet6/ip6_divert.c 6 Jun 2025 17:32:31 -0000
@@ -187,6 +187,7 @@ void
divert6_packet(struct mbuf *m, int dir, u_int16_t divert_port)
{
struct inpcb *inp = NULL;
+ void *pcb;
struct socket *so;
struct sockaddr_in6 sin6;
@@ -210,6 +211,12 @@ divert6_packet(struct mbuf *m, int dir,
div6stat_inc(divs_noport);
goto bad;
}
+ pcb = READ_ONCE(inp->inp_socket->so_pcb);
+ if (pcb == NULL) {
+ div6stat_inc(divs_closing);
+ goto bad;
+ }
+ KASSERT(pcb == inp);
memset(&sin6, 0, sizeof(sin6));
sin6.sin6_family = AF_INET6;
Index: sys/netinet6/raw_ip6.c
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/netinet6/raw_ip6.c,v
diff -u -p -r1.192 raw_ip6.c
--- sys/netinet6/raw_ip6.c 27 May 2025 07:52:49 -0000 1.192
+++ sys/netinet6/raw_ip6.c 6 Jun 2025 17:32:31 -0000
@@ -138,6 +138,7 @@ rip6_input(struct mbuf **mp, int *offp,
struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *);
struct inpcb_iterator iter = { .inp_table = NULL };
struct inpcb *inp, *last;
+ void *pcb;
struct in6_addr *key;
struct sockaddr_in6 rip6src;
uint8_t type;
@@ -184,6 +185,10 @@ rip6_input(struct mbuf **mp, int *offp,
while ((inp = in_pcb_iterator(&rawin6pcbtable, inp, &iter)) != NULL) {
KASSERT(ISSET(inp->inp_flags, INP_IPV6));
+ pcb = READ_ONCE(inp->inp_socket->so_pcb);
+ if (pcb == NULL)
+ continue;
+ KASSERT(pcb == inp);
/*
* Packet must not be inserted after disconnected wakeup
* call. To avoid race, check again when holding receive
Index: usr.bin/netstat/inet.c
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/usr.bin/netstat/inet.c,v
diff -u -p -r1.184 inet.c
--- usr.bin/netstat/inet.c 17 Apr 2025 17:23:17 -0000 1.184
+++ usr.bin/netstat/inet.c 5 Jun 2025 13:03:15 -0000
@@ -468,6 +468,7 @@ tcp_stats(char *name)
p(tcps_preddat, "\t%u correct data packet header prediction%s\n");
pes(tcps_pcbhashmiss, "\t%u PCB cache miss%s\n");
p1(tcps_noport, "\t%u dropped due to no socket\n");
+ p1(tcps_closing, "\t%u dropped as socket is closing\n");
p(tcps_ecn_accepts, "\t%u ECN connection%s accepted\n");
p(tcps_ecn_rcvece, "\t\t%u ECE packet%s received\n");
@@ -556,6 +557,7 @@ udp_stats(char *name)
p(udps_outswcsum, "\t%lu output packet%s software-checksummed\n");
p1(udps_noport, "\t%lu dropped due to no socket\n");
p(udps_noportbcast, "\t%lu broadcast/multicast datagram%s dropped due to no socket\n");
+ p1(udps_closing, "\t%lu dropped as socket is closing\n");
p1(udps_nosec, "\t%lu dropped due to missing IPsec protection\n");
p1(udps_fullsock, "\t%lu dropped due to full socket buffers\n");
delivered = udpstat.udps_ipackets - udpstat.udps_hdrops -
@@ -657,6 +659,7 @@ div_stats(char *name)
printf(m, divstat.f)
p(divs_ipackets, "\t%lu total packet%s received\n");
p1(divs_noport, "\t%lu dropped due to no socket\n");
+ p1(divs_closing, "\t%lu dropped as socket is closing\n");
p1(divs_fullsock, "\t%lu dropped due to full socket buffers\n");
p(divs_opackets, "\t%lu packet%s output\n");
p1(divs_errors, "\t%lu errors\n");
Index: usr.bin/netstat/inet6.c
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/usr.bin/netstat/inet6.c,v
diff -u -p -r1.58 inet6.c
--- usr.bin/netstat/inet6.c 4 Jun 2025 12:37:00 -0000 1.58
+++ usr.bin/netstat/inet6.c 5 Jun 2025 13:03:23 -0000
@@ -863,6 +863,7 @@ rip6_stats(char *name)
printf("\t%llu delivered\n", (unsigned long long)delivered);
p(rip6s_opackets, "\t%llu datagram%s output\n");
#undef p
+#undef p1
}
/*
@@ -889,6 +890,7 @@ div6_stats(char *name)
printf(m, div6stat.f)
p(divs_ipackets, "\t%lu total packet%s received\n");
p1(divs_noport, "\t%lu dropped due to no socket\n");
+ p1(divs_closing, "\t%lu dropped as socket is closing\n");
p1(divs_fullsock, "\t%lu dropped due to full socket buffers\n");
p(divs_opackets, "\t%lu packet%s output\n");
p1(divs_errors, "\t%lu errors\n");
netstat counter for async socket close