Index | Thread | Search

From:
Vitaliy Makkoveev <otto@bsdbox.dev>
Subject:
Re: raw IPv6 in parallel
To:
Alexander Bluhm <bluhm@openbsd.org>
Cc:
tech@openbsd.org
Date:
Tue, 16 Apr 2024 01:03:37 +0300

Download raw body.

Thread
> On 16 Apr 2024, at 00:25, Alexander Bluhm <bluhm@openbsd.org> wrote:
> 
> Hi,
> 
> This diff brings rip6_input() in line with shared net lock rip6_input().
> 
> The fields inp_icmp6filt and inp_cksum6 are protected by exclusive
> net lock.  I have a follow up diff to tune them and document this.
> 
> IPv4 function rip_disconnect() calls soisdisconnected() and I think
> IPv6 should do this, too.
> 
> Other BSDs are confusing.  NetBSD rip_disconnect() calls
> soisdisconnected(), but rip6_disconnect() only clears SS_ISCONNECTED.
> FreeBSD rip_disconnect() clears SS_ISCONNECTED, but rip6_disconnect()
> calls soisdisconnected(), so it is the other way around.
> 
> Consistent would be to always call soisdisconnected().  The reuslt
> is that SS_CANTRCVMORE and SS_CANTSENDMORE are set and subsequent
> read and write result in EOF or EPIPE.
> 
> 4.4BSD allows reconnect for UDP, but not for raw IP.  I think we
> should use the same behavior for IPv6.
> 
> ok?
> 

ok mvs

soisdisconnected() awakes sleeping threads polling threads. This is
better, because otherwise userland program continues to sleep and
SS_ flag modification has no sense.

unix(4) sockets have the same problem in unp_detach(). We clean
SS_ISCONNECTED and set so_error on disconnected peer, but don’t awake
it’s sleeping threads.

> bluhm
> 
> Index: netinet6/in6_proto.c
> ===================================================================
> RCS file: /data/mirror/openbsd/cvs/src/sys/netinet6/in6_proto.c,v
> diff -u -p -r1.113 in6_proto.c
> --- netinet6/in6_proto.c	11 Jan 2024 14:15:12 -0000	1.113
> +++ netinet6/in6_proto.c	15 Apr 2024 19:26:36 -0000
> @@ -158,7 +158,7 @@ const struct protosw inet6sw[] = {
>   .pr_type	= SOCK_RAW,
>   .pr_domain	= &inet6domain,
>   .pr_protocol	= IPPROTO_RAW,
> -  .pr_flags	= PR_ATOMIC|PR_ADDR,
> +  .pr_flags	= PR_ATOMIC|PR_ADDR|PR_MPINPUT,
>   .pr_input	= rip6_input,
>   .pr_ctlinput	= rip6_ctlinput,
>   .pr_ctloutput	= rip6_ctloutput,
> @@ -322,7 +322,7 @@ const struct protosw inet6sw[] = {
>   /* raw wildcard */
>   .pr_type	= SOCK_RAW,
>   .pr_domain	= &inet6domain,
> -  .pr_flags	= PR_ATOMIC|PR_ADDR,
> +  .pr_flags	= PR_ATOMIC|PR_ADDR|PR_MPINPUT,
>   .pr_input	= rip6_input,
>   .pr_ctloutput	= rip6_ctloutput,
>   .pr_usrreqs	= &rip6_usrreqs,
> Index: netinet6/raw_ip6.c
> ===================================================================
> RCS file: /data/mirror/openbsd/cvs/src/sys/netinet6/raw_ip6.c,v
> diff -u -p -r1.182 raw_ip6.c
> --- netinet6/raw_ip6.c	13 Feb 2024 12:22:09 -0000	1.182
> +++ netinet6/raw_ip6.c	15 Apr 2024 19:26:36 -0000
> @@ -155,9 +155,9 @@ rip6_input(struct mbuf **mp, int *offp, 
> 	} else
> 		rip6stat_inc(rip6s_ipackets);
> 
> -	bzero(&rip6src, sizeof(rip6src));
> -	rip6src.sin6_len = sizeof(struct sockaddr_in6);
> +	memset(&rip6src, 0, sizeof(rip6src));
> 	rip6src.sin6_family = AF_INET6;
> +	rip6src.sin6_len = sizeof(rip6src);
> 	/* KAME hack: recover scopeid */
> 	in6_recoverscope(&rip6src, &ip6->ip6_src);
> 
> @@ -186,7 +186,13 @@ rip6_input(struct mbuf **mp, int *offp, 
> 	TAILQ_FOREACH(inp, &rawin6pcbtable.inpt_queue, inp_queue) {
> 		KASSERT(ISSET(inp->inp_flags, INP_IPV6));
> 
> -		if (inp->inp_socket->so_rcv.sb_state & SS_CANTRCVMORE)
> +		/*
> +		 * Packet must not be inserted after disconnected wakeup
> +		 * call.  To avoid race, check again when holding receive
> +		 * buffer mutex.
> +		 */
> +		if (ISSET(READ_ONCE(inp->inp_socket->so_rcv.sb_state),
> +		    SS_CANTRCVMORE))
> 			continue;
> 		if (rtable_l2(inp->inp_rtableid) !=
> 		    rtable_l2(m->m_pkthdr.ph_rtableid))
> @@ -264,7 +270,7 @@ rip6_input(struct mbuf **mp, int *offp, 
> 			n = m_copym(m, 0, M_COPYALL, M_NOWAIT);
> 		if (n != NULL) {
> 			struct socket *so = inp->inp_socket;
> -			int ret;
> +			int ret = 0;
> 
> 			if (inp->inp_flags & IN6P_CONTROLOPTS)
> 				ip6_savecontrol(inp, n, &opts);
> @@ -272,12 +278,14 @@ rip6_input(struct mbuf **mp, int *offp, 
> 			m_adj(n, *offp);
> 
> 			mtx_enter(&so->so_rcv.sb_mtx);
> -			ret = sbappendaddr(so, &so->so_rcv,
> -			    sin6tosa(&rip6src), n, opts);
> +			if (!ISSET(inp->inp_socket->so_rcv.sb_state,
> +			    SS_CANTRCVMORE)) {
> +				ret = sbappendaddr(so, &so->so_rcv,
> +				    sin6tosa(&rip6src), n, opts);
> +			}
> 			mtx_leave(&so->so_rcv.sb_mtx);
> 
> 			if (ret == 0) {
> -				/* should notify about lost packet */
> 				m_freem(n);
> 				m_freem(opts);
> 				rip6stat_inc(rip6s_fullsock);
> @@ -727,7 +735,7 @@ rip6_disconnect(struct socket *so)
> 	if ((so->so_state & SS_ISCONNECTED) == 0)
> 		return (ENOTCONN);
> 
> -	so->so_state &= ~SS_ISCONNECTED;	/* XXX */
> +	soisdisconnected(so);
> 	mtx_enter(&rawin6pcbtable.inpt_mtx);
> 	inp->inp_faddr6 = in6addr_any;
> 	mtx_leave(&rawin6pcbtable.inpt_mtx);
>