Index | Thread | Search

From:
Alexander Bluhm <bluhm@openbsd.org>
Subject:
netstat counter for async socket close
To:
tech@openbsd.org
Date:
Wed, 4 Jun 2025 16:17:39 +0200

Download raw body.

Thread
Hi,

Currently soclose() is protected by exclusive netlock.  Except for
TCP that means an inpcb reference always contains a socket pointer
in inp_socket that points back to the incpb.

When we start unlocking soclose(), this is no longer the case as
in_pcbdetach() can set so_pcb to NULL running on another CPU.  For
TCP input we take the socket lock for every packet with in_pcbsolock().
If that returns NULL another CPU has called in_pcbdetach() due to
a reset packet or timeout.  Instead of reusing the tcps_noport
counter, I would like to introduce tcps_closing for such packets.

For datagram and raw sockets I want to avoid a per socket lock as
this would kill the packet input performance.  The main problem
here is that KASSERT(sotoinpcb(inp->inp_socket) == inp) could be
false as so_pcb is NULL.  Only a weak check with READ_ONCE() is
possible without lock.  I think it is still worthy to drop the
packet early and check the KASSERT().  If the socket goes away
later, this is not a problem.  Packets will be purged in sorele()
after inpcb has been released.

This diff adds the closing counter to all protocols.  Also the
KASSERT(pcb == inp) is added to all socket lookups.  It is aware
that other CPU can set so_pcb to NULL and then returns early.

ok?

bluhm

Index: sys/netinet/ip_divert.c
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/ip_divert.c,v
diff -u -p -r1.104 ip_divert.c
--- sys/netinet/ip_divert.c	4 Jun 2025 12:37:00 -0000	1.104
+++ sys/netinet/ip_divert.c	4 Jun 2025 13:12:49 -0000
@@ -190,6 +190,7 @@ void
 divert_packet(struct mbuf *m, int dir, u_int16_t divert_port)
 {
 	struct inpcb *inp = NULL;
+	void *pcb;
 	struct socket *so;
 	struct sockaddr_in sin;
 
@@ -213,6 +214,12 @@ divert_packet(struct mbuf *m, int dir, u
 		divstat_inc(divs_noport);
 		goto bad;
 	}
+	pcb = READ_ONCE(inp->inp_socket->so_pcb);
+	if (pcb == NULL) {
+		divstat_inc(divs_closing);
+		goto bad;
+	}
+	KASSERT(pcb == inp);
 
 	memset(&sin, 0, sizeof(sin));
 	sin.sin_family = AF_INET;
Index: sys/netinet/ip_divert.h
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/ip_divert.h,v
diff -u -p -r1.27 ip_divert.h
--- sys/netinet/ip_divert.h	4 Jun 2025 12:37:00 -0000	1.27
+++ sys/netinet/ip_divert.h	4 Jun 2025 13:12:49 -0000
@@ -22,6 +22,7 @@
 struct divstat {
 	u_long	divs_ipackets;	/* total input packets */
 	u_long	divs_noport;	/* no socket on port */
+	u_long	divs_closing;	/* inpcb exists, socket is closing */
 	u_long	divs_fullsock;	/* not delivered, input socket full */
 	u_long	divs_opackets;	/* total output packets */
 	u_long	divs_errors;	/* generic errors */
@@ -49,6 +50,7 @@ struct divstat {
 enum divstat_counters {
 	divs_ipackets,
 	divs_noport,
+	divs_closing,
 	divs_fullsock,
 	divs_opackets,
 	divs_errors,
Index: sys/netinet/ip_var.h
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/ip_var.h,v
diff -u -p -r1.121 ip_var.h
--- sys/netinet/ip_var.h	2 Mar 2025 21:28:32 -0000	1.121
+++ sys/netinet/ip_var.h	4 Jun 2025 13:12:49 -0000
@@ -68,6 +68,7 @@ struct	ipstat {
 	u_long	ips_cantforward;	/* packets rcvd for unreachable dest */
 	u_long	ips_redirectsent;	/* packets forwarded on same net */
 	u_long	ips_noproto;		/* unknown or unsupported protocol */
+	u_long	ips_closing;		/* inpcb exists, socket is closing */
 	u_long	ips_delivered;		/* datagrams delivered to upper level*/
 	u_long	ips_localout;		/* total ip packets generated here */
 	u_long	ips_odropped;		/* lost output due to nobufs, etc. */
@@ -116,6 +117,7 @@ enum ipstat_counters {
 	ips_cantforward,	/* packets rcvd for unreachable dest */
 	ips_redirectsent,	/* packets forwarded on same net */
 	ips_noproto,		/* unknown or unsupported protocol */
+	ips_closing,		/* inpcb exists, socket is closing */
 	ips_delivered,		/* datagrams delivered to upper level*/
 	ips_localout,		/* total ip packets generated here */
 	ips_odropped,		/* lost output packets due to nobufs, etc. */
Index: sys/netinet/raw_ip.c
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/raw_ip.c,v
diff -u -p -r1.166 raw_ip.c
--- sys/netinet/raw_ip.c	11 Mar 2025 15:31:03 -0000	1.166
+++ sys/netinet/raw_ip.c	4 Jun 2025 13:12:49 -0000
@@ -135,6 +135,7 @@ rip_input(struct mbuf **mp, int *offp, i
 	struct ip *ip = mtod(m, struct ip *);
 	struct inpcb_iterator iter = { .inp_table = NULL };
 	struct inpcb *inp, *last;
+	void *pcb;
 	struct in_addr *key;
 	struct sockaddr_in ripsrc;
 
@@ -169,6 +170,12 @@ rip_input(struct mbuf **mp, int *offp, i
 	while ((inp = in_pcb_iterator(&rawcbtable, inp, &iter)) != NULL) {
 		KASSERT(!ISSET(inp->inp_flags, INP_IPV6));
 
+		pcb = READ_ONCE(inp->inp_socket->so_pcb);
+		if (pcb == NULL) {
+			ipstat_inc(ips_closing);
+			continue;
+		}
+		KASSERT(pcb == inp);
 		/*
 		 * Packet must not be inserted after disconnected wakeup
 		 * call.  To avoid race, check again when holding receive
Index: sys/netinet/tcp_input.c
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/tcp_input.c,v
diff -u -p -r1.450 tcp_input.c
--- sys/netinet/tcp_input.c	3 Jun 2025 16:51:26 -0000	1.450
+++ sys/netinet/tcp_input.c	4 Jun 2025 13:12:49 -0000
@@ -661,10 +661,9 @@ findpcb:
 		so = in_pcbsolock(inp);
 	}
 	if (so == NULL) {
-		tcpstat_inc(tcps_noport);
+		tcpstat_inc(tcps_closing);
 		goto dropwithreset_ratelim;
 	}
-
 	KASSERT(sotoinpcb(inp->inp_socket) == inp);
 	KASSERT(intotcpcb(inp) == NULL || intotcpcb(inp)->t_inpcb == inp);
 	soassertlocked(inp->inp_socket);
Index: sys/netinet/tcp_var.h
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/tcp_var.h,v
diff -u -p -r1.191 tcp_var.h
--- sys/netinet/tcp_var.h	7 May 2025 14:10:19 -0000	1.191
+++ sys/netinet/tcp_var.h	4 Jun 2025 13:22:54 -0000
@@ -393,6 +393,7 @@ struct	tcpstat {
 
 	u_int32_t tcps_pcbhashmiss;	/* input packets missing pcb hash */
 	u_int32_t tcps_noport;		/* no socket on port */
+	u_int32_t tcps_closing;		/* inpcb exists, socket is closing */
 	u_int32_t tcps_badsyn;		/* SYN packet with src==dst rcv'ed */
 	u_int32_t tcps_dropsyn;		/* SYN packet dropped */
 
@@ -583,6 +584,7 @@ enum tcpstat_counters {
 	tcps_preddat,
 	tcps_pcbhashmiss,
 	tcps_noport,
+	tcps_closing,
 	tcps_badsyn,
 	tcps_dropsyn,
 	tcps_rcvbadsig,
Index: sys/netinet/udp_usrreq.c
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/udp_usrreq.c,v
diff -u -p -r1.341 udp_usrreq.c
--- sys/netinet/udp_usrreq.c	3 Jun 2025 16:51:26 -0000	1.341
+++ sys/netinet/udp_usrreq.c	4 Jun 2025 13:12:49 -0000
@@ -198,6 +198,7 @@ udp_input(struct mbuf **mp, int *offp, i
 	struct ip *ip = NULL;
 	struct udphdr *uh;
 	struct inpcb *inp = NULL;
+	void *pcb;
 	struct ip save_ip;
 	int len;
 	u_int16_t savesum;
@@ -419,6 +420,12 @@ udp_input(struct mbuf **mp, int *offp, i
 			else
 				KASSERT(!ISSET(inp->inp_flags, INP_IPV6));
 
+			pcb = READ_ONCE(inp->inp_socket->so_pcb);
+			if (pcb == NULL) {
+				udpstat_inc(udps_closing);
+				continue;
+			}
+			KASSERT(pcb == inp);
 			if (inp->inp_socket->so_rcv.sb_state & SS_CANTRCVMORE)
 				continue;
 			if (rtable_l2(inp->inp_rtableid) !=
@@ -596,7 +603,12 @@ udp_input(struct mbuf **mp, int *offp, i
 		return IPPROTO_DONE;
 	}
 
-	KASSERT(sotoinpcb(inp->inp_socket) == inp);
+	pcb = READ_ONCE(inp->inp_socket->so_pcb);
+	if (pcb == NULL) {
+		udpstat_inc(udps_closing);
+		goto bad;
+	}
+	KASSERT(pcb == inp);
 	soassertlocked_readonly(inp->inp_socket);
 
 #ifdef INET6
Index: sys/netinet/udp_var.h
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/udp_var.h,v
diff -u -p -r1.53 udp_var.h
--- sys/netinet/udp_var.h	2 Mar 2025 21:28:32 -0000	1.53
+++ sys/netinet/udp_var.h	4 Jun 2025 13:12:49 -0000
@@ -61,6 +61,7 @@ struct	udpstat {
 	u_long	udps_badlen;		/* data length larger than packet */
 	u_long	udps_noport;		/* no socket on port */
 	u_long	udps_noportbcast;	/* of above, arrived as broadcast */
+	u_long	udps_closing;		/* inpcb exists, socket is closing */
 	u_long	udps_nosec;		/* dropped for lack of ipsec */
 	u_long	udps_fullsock;		/* not delivered, input socket full */
 	u_long	udps_pcbhashmiss;	/* input packets missing pcb hash */
@@ -104,6 +105,7 @@ enum udpstat_counters {
 	udps_badlen,		/* data length larger than packet */
 	udps_noport,		/* no socket on port */
 	udps_noportbcast,	/* of above, arrived as broadcast */
+	udps_closing,		/* inpcb exists, socket is closing */
 	udps_nosec,		/* dropped for lack of ipsec */
 	udps_fullsock,		/* not delivered, input socket full */
 	udps_pcbhashmiss,	/* input packets missing pcb hash */
Index: sys/netinet6/ip6_divert.c
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/netinet6/ip6_divert.c,v
diff -u -p -r1.103 ip6_divert.c
--- sys/netinet6/ip6_divert.c	4 Jun 2025 12:37:00 -0000	1.103
+++ sys/netinet6/ip6_divert.c	4 Jun 2025 13:21:35 -0000
@@ -199,6 +199,7 @@ void
 divert6_packet(struct mbuf *m, int dir, u_int16_t divert_port)
 {
 	struct inpcb *inp = NULL;
+	void *pcb;
 	struct socket *so;
 	struct sockaddr_in6 sin6;
 
@@ -222,6 +223,12 @@ divert6_packet(struct mbuf *m, int dir, 
 		div6stat_inc(divs_noport);
 		goto bad;
 	}
+	pcb = READ_ONCE(inp->inp_socket->so_pcb);
+	if (pcb == NULL) {
+		div6stat_inc(divs_closing);
+		goto bad;
+	}
+	KASSERT(pcb == inp);
 
 	memset(&sin6, 0, sizeof(sin6));
 	sin6.sin6_family = AF_INET6;
Index: sys/netinet6/raw_ip6.c
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/netinet6/raw_ip6.c,v
diff -u -p -r1.192 raw_ip6.c
--- sys/netinet6/raw_ip6.c	27 May 2025 07:52:49 -0000	1.192
+++ sys/netinet6/raw_ip6.c	4 Jun 2025 13:12:49 -0000
@@ -138,6 +138,7 @@ rip6_input(struct mbuf **mp, int *offp, 
 	struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *);
 	struct inpcb_iterator iter = { .inp_table = NULL };
 	struct inpcb *inp, *last;
+	void *pcb;
 	struct in6_addr *key;
 	struct sockaddr_in6 rip6src;
 	uint8_t type;
@@ -184,6 +185,12 @@ rip6_input(struct mbuf **mp, int *offp, 
 	while ((inp = in_pcb_iterator(&rawin6pcbtable, inp, &iter)) != NULL) {
 		KASSERT(ISSET(inp->inp_flags, INP_IPV6));
 
+		pcb = READ_ONCE(inp->inp_socket->so_pcb);
+		if (pcb == NULL) {
+			rip6stat_inc(rip6s_closing);
+			continue;
+		}
+		KASSERT(pcb == inp);
 		/*
 		 * Packet must not be inserted after disconnected wakeup
 		 * call.  To avoid race, check again when holding receive
Index: sys/netinet6/raw_ip6.h
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/netinet6/raw_ip6.h,v
diff -u -p -r1.4 raw_ip6.h
--- sys/netinet6/raw_ip6.h	9 Feb 2017 15:23:35 -0000	1.4
+++ sys/netinet6/raw_ip6.h	4 Jun 2025 13:12:49 -0000
@@ -42,6 +42,7 @@ struct rip6stat {
 	u_int64_t rip6s_badsum;		/* of above, checksum error */
 	u_int64_t rip6s_nosock;		/* no matching socket */
 	u_int64_t rip6s_nosockmcast;	/* of above, arrived as multicast */
+	u_int64_t rip6s_closing;	/* inpcb exists, socket is closing */
 	u_int64_t rip6s_fullsock;	/* not delivered, input socket full */
 
 	u_int64_t rip6s_opackets;	/* total output packets */
@@ -68,6 +69,7 @@ enum rip6stat_counters {
 	rip6s_badsum,
 	rip6s_nosock,
 	rip6s_nosockmcast,
+	rip6s_closing,
 	rip6s_fullsock,
 	rip6s_opackets,
 	rip6s_ncounters,
Index: usr.bin/netstat/inet.c
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/usr.bin/netstat/inet.c,v
diff -u -p -r1.184 inet.c
--- usr.bin/netstat/inet.c	17 Apr 2025 17:23:17 -0000	1.184
+++ usr.bin/netstat/inet.c	4 Jun 2025 12:41:21 -0000
@@ -468,6 +468,7 @@ tcp_stats(char *name)
 	p(tcps_preddat, "\t%u correct data packet header prediction%s\n");
 	pes(tcps_pcbhashmiss, "\t%u PCB cache miss%s\n");
 	p1(tcps_noport, "\t%u dropped due to no socket\n");
+	p1(tcps_closing, "\t%u dropped as socket is closing\n");
 
 	p(tcps_ecn_accepts, "\t%u ECN connection%s accepted\n");
 	p(tcps_ecn_rcvece, "\t\t%u ECE packet%s received\n");
@@ -556,6 +557,7 @@ udp_stats(char *name)
 	p(udps_outswcsum, "\t%lu output packet%s software-checksummed\n");
 	p1(udps_noport, "\t%lu dropped due to no socket\n");
 	p(udps_noportbcast, "\t%lu broadcast/multicast datagram%s dropped due to no socket\n");
+	p1(udps_closing, "\t%lu dropped as socket is closing\n");
 	p1(udps_nosec, "\t%lu dropped due to missing IPsec protection\n");
 	p1(udps_fullsock, "\t%lu dropped due to full socket buffers\n");
 	delivered = udpstat.udps_ipackets - udpstat.udps_hdrops -
@@ -608,6 +610,7 @@ ip_stats(char *name)
 	p(ips_reassembled, "\t%lu packet%s reassembled ok\n");
 	p(ips_delivered, "\t%lu packet%s for this host\n");
 	p(ips_noproto, "\t%lu packet%s for unknown/unsupported protocol\n");
+	p1(ips_closing, "\t%lu dropped as socket is closing\n");
 	p(ips_forward, "\t%lu packet%s forwarded\n");
 	p(ips_cantforward, "\t%lu packet%s not forwardable\n");
 	p(ips_redirectsent, "\t%lu redirect%s sent\n");
@@ -657,6 +660,7 @@ div_stats(char *name)
 	printf(m, divstat.f)
 	p(divs_ipackets, "\t%lu total packet%s received\n");
 	p1(divs_noport, "\t%lu dropped due to no socket\n");
+	p1(divs_closing, "\t%lu dropped as socket is closing\n");
 	p1(divs_fullsock, "\t%lu dropped due to full socket buffers\n");
 	p(divs_opackets, "\t%lu packet%s output\n");
 	p1(divs_errors, "\t%lu errors\n");
Index: usr.bin/netstat/inet6.c
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/usr.bin/netstat/inet6.c,v
diff -u -p -r1.58 inet6.c
--- usr.bin/netstat/inet6.c	4 Jun 2025 12:37:00 -0000	1.58
+++ usr.bin/netstat/inet6.c	4 Jun 2025 12:41:21 -0000
@@ -847,10 +847,13 @@ rip6_stats(char *name)
 
 #define	p(f, m) if (rip6stat.f || sflag <= 1) \
     printf(m, (unsigned long long)rip6stat.f, plural(rip6stat.f))
+#define	p1(f, m) if (rip6stat.f || sflag <= 1) \
+    printf(m, (unsigned long long)rip6stat.f)
 	p(rip6s_ipackets, "\t%llu message%s received\n");
 	p(rip6s_isum, "\t%llu checksum calculation%s on inbound\n");
 	p(rip6s_badsum, "\t%llu message%s with bad checksum\n");
 	p(rip6s_nosock, "\t%llu message%s dropped due to no socket\n");
+	p1(rip6s_closing, "\t%llu dropped as socket is closing\n");
 	p(rip6s_nosockmcast,
 	    "\t%llu multicast message%s dropped due to no socket\n");
 	p(rip6s_fullsock,
@@ -863,6 +866,7 @@ rip6_stats(char *name)
 		printf("\t%llu delivered\n", (unsigned long long)delivered);
 	p(rip6s_opackets, "\t%llu datagram%s output\n");
 #undef p
+#undef p1
 }
 
 /*
@@ -889,6 +893,7 @@ div6_stats(char *name)
     printf(m, div6stat.f)
 	p(divs_ipackets, "\t%lu total packet%s received\n");
 	p1(divs_noport, "\t%lu dropped due to no socket\n");
+	p1(divs_closing, "\t%lu dropped as socket is closing\n");
 	p1(divs_fullsock, "\t%lu dropped due to full socket buffers\n");
 	p(divs_opackets, "\t%lu packet%s output\n");
 	p1(divs_errors, "\t%lu errors\n");