Index | Thread | Search

From:
Alexander Bluhm <bluhm@openbsd.org>
Subject:
raw IPv6 in parallel
To:
tech@openbsd.org
Date:
Mon, 15 Apr 2024 23:25:34 +0200

Download raw body.

Thread
Hi,

This diff brings rip6_input() in line with shared net lock rip6_input().

The fields inp_icmp6filt and inp_cksum6 are protected by exclusive
net lock.  I have a follow up diff to tune them and document this.

IPv4 function rip_disconnect() calls soisdisconnected() and I think
IPv6 should do this, too.

Other BSDs are confusing.  NetBSD rip_disconnect() calls
soisdisconnected(), but rip6_disconnect() only clears SS_ISCONNECTED.
FreeBSD rip_disconnect() clears SS_ISCONNECTED, but rip6_disconnect()
calls soisdisconnected(), so it is the other way around.

Consistent would be to always call soisdisconnected().  The reuslt
is that SS_CANTRCVMORE and SS_CANTSENDMORE are set and subsequent
read and write result in EOF or EPIPE.

4.4BSD allows reconnect for UDP, but not for raw IP.  I think we
should use the same behavior for IPv6.

ok?

bluhm

Index: netinet6/in6_proto.c
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/netinet6/in6_proto.c,v
diff -u -p -r1.113 in6_proto.c
--- netinet6/in6_proto.c	11 Jan 2024 14:15:12 -0000	1.113
+++ netinet6/in6_proto.c	15 Apr 2024 19:26:36 -0000
@@ -158,7 +158,7 @@ const struct protosw inet6sw[] = {
   .pr_type	= SOCK_RAW,
   .pr_domain	= &inet6domain,
   .pr_protocol	= IPPROTO_RAW,
-  .pr_flags	= PR_ATOMIC|PR_ADDR,
+  .pr_flags	= PR_ATOMIC|PR_ADDR|PR_MPINPUT,
   .pr_input	= rip6_input,
   .pr_ctlinput	= rip6_ctlinput,
   .pr_ctloutput	= rip6_ctloutput,
@@ -322,7 +322,7 @@ const struct protosw inet6sw[] = {
   /* raw wildcard */
   .pr_type	= SOCK_RAW,
   .pr_domain	= &inet6domain,
-  .pr_flags	= PR_ATOMIC|PR_ADDR,
+  .pr_flags	= PR_ATOMIC|PR_ADDR|PR_MPINPUT,
   .pr_input	= rip6_input,
   .pr_ctloutput	= rip6_ctloutput,
   .pr_usrreqs	= &rip6_usrreqs,
Index: netinet6/raw_ip6.c
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/netinet6/raw_ip6.c,v
diff -u -p -r1.182 raw_ip6.c
--- netinet6/raw_ip6.c	13 Feb 2024 12:22:09 -0000	1.182
+++ netinet6/raw_ip6.c	15 Apr 2024 19:26:36 -0000
@@ -155,9 +155,9 @@ rip6_input(struct mbuf **mp, int *offp, 
 	} else
 		rip6stat_inc(rip6s_ipackets);
 
-	bzero(&rip6src, sizeof(rip6src));
-	rip6src.sin6_len = sizeof(struct sockaddr_in6);
+	memset(&rip6src, 0, sizeof(rip6src));
 	rip6src.sin6_family = AF_INET6;
+	rip6src.sin6_len = sizeof(rip6src);
 	/* KAME hack: recover scopeid */
 	in6_recoverscope(&rip6src, &ip6->ip6_src);
 
@@ -186,7 +186,13 @@ rip6_input(struct mbuf **mp, int *offp, 
 	TAILQ_FOREACH(inp, &rawin6pcbtable.inpt_queue, inp_queue) {
 		KASSERT(ISSET(inp->inp_flags, INP_IPV6));
 
-		if (inp->inp_socket->so_rcv.sb_state & SS_CANTRCVMORE)
+		/*
+		 * Packet must not be inserted after disconnected wakeup
+		 * call.  To avoid race, check again when holding receive
+		 * buffer mutex.
+		 */
+		if (ISSET(READ_ONCE(inp->inp_socket->so_rcv.sb_state),
+		    SS_CANTRCVMORE))
 			continue;
 		if (rtable_l2(inp->inp_rtableid) !=
 		    rtable_l2(m->m_pkthdr.ph_rtableid))
@@ -264,7 +270,7 @@ rip6_input(struct mbuf **mp, int *offp, 
 			n = m_copym(m, 0, M_COPYALL, M_NOWAIT);
 		if (n != NULL) {
 			struct socket *so = inp->inp_socket;
-			int ret;
+			int ret = 0;
 
 			if (inp->inp_flags & IN6P_CONTROLOPTS)
 				ip6_savecontrol(inp, n, &opts);
@@ -272,12 +278,14 @@ rip6_input(struct mbuf **mp, int *offp, 
 			m_adj(n, *offp);
 
 			mtx_enter(&so->so_rcv.sb_mtx);
-			ret = sbappendaddr(so, &so->so_rcv,
-			    sin6tosa(&rip6src), n, opts);
+			if (!ISSET(inp->inp_socket->so_rcv.sb_state,
+			    SS_CANTRCVMORE)) {
+				ret = sbappendaddr(so, &so->so_rcv,
+				    sin6tosa(&rip6src), n, opts);
+			}
 			mtx_leave(&so->so_rcv.sb_mtx);
 
 			if (ret == 0) {
-				/* should notify about lost packet */
 				m_freem(n);
 				m_freem(opts);
 				rip6stat_inc(rip6s_fullsock);
@@ -727,7 +735,7 @@ rip6_disconnect(struct socket *so)
 	if ((so->so_state & SS_ISCONNECTED) == 0)
 		return (ENOTCONN);
 
-	so->so_state &= ~SS_ISCONNECTED;	/* XXX */
+	soisdisconnected(so);
 	mtx_enter(&rawin6pcbtable.inpt_mtx);
 	inp->inp_faddr6 = in6addr_any;
 	mtx_leave(&rawin6pcbtable.inpt_mtx);