From: Alexander Bluhm Subject: raw IPv6 in parallel To: tech@openbsd.org Date: Mon, 15 Apr 2024 23:25:34 +0200 Hi, This diff brings rip6_input() in line with shared net lock rip6_input(). The fields inp_icmp6filt and inp_cksum6 are protected by exclusive net lock. I have a follow up diff to tune them and document this. IPv4 function rip_disconnect() calls soisdisconnected() and I think IPv6 should do this, too. Other BSDs are confusing. NetBSD rip_disconnect() calls soisdisconnected(), but rip6_disconnect() only clears SS_ISCONNECTED. FreeBSD rip_disconnect() clears SS_ISCONNECTED, but rip6_disconnect() calls soisdisconnected(), so it is the other way around. Consistent would be to always call soisdisconnected(). The reuslt is that SS_CANTRCVMORE and SS_CANTSENDMORE are set and subsequent read and write result in EOF or EPIPE. 4.4BSD allows reconnect for UDP, but not for raw IP. I think we should use the same behavior for IPv6. ok? bluhm Index: netinet6/in6_proto.c =================================================================== RCS file: /data/mirror/openbsd/cvs/src/sys/netinet6/in6_proto.c,v diff -u -p -r1.113 in6_proto.c --- netinet6/in6_proto.c 11 Jan 2024 14:15:12 -0000 1.113 +++ netinet6/in6_proto.c 15 Apr 2024 19:26:36 -0000 @@ -158,7 +158,7 @@ const struct protosw inet6sw[] = { .pr_type = SOCK_RAW, .pr_domain = &inet6domain, .pr_protocol = IPPROTO_RAW, - .pr_flags = PR_ATOMIC|PR_ADDR, + .pr_flags = PR_ATOMIC|PR_ADDR|PR_MPINPUT, .pr_input = rip6_input, .pr_ctlinput = rip6_ctlinput, .pr_ctloutput = rip6_ctloutput, @@ -322,7 +322,7 @@ const struct protosw inet6sw[] = { /* raw wildcard */ .pr_type = SOCK_RAW, .pr_domain = &inet6domain, - .pr_flags = PR_ATOMIC|PR_ADDR, + .pr_flags = PR_ATOMIC|PR_ADDR|PR_MPINPUT, .pr_input = rip6_input, .pr_ctloutput = rip6_ctloutput, .pr_usrreqs = &rip6_usrreqs, Index: netinet6/raw_ip6.c =================================================================== RCS file: /data/mirror/openbsd/cvs/src/sys/netinet6/raw_ip6.c,v diff -u -p -r1.182 raw_ip6.c --- netinet6/raw_ip6.c 13 Feb 2024 12:22:09 -0000 1.182 +++ netinet6/raw_ip6.c 15 Apr 2024 19:26:36 -0000 @@ -155,9 +155,9 @@ rip6_input(struct mbuf **mp, int *offp, } else rip6stat_inc(rip6s_ipackets); - bzero(&rip6src, sizeof(rip6src)); - rip6src.sin6_len = sizeof(struct sockaddr_in6); + memset(&rip6src, 0, sizeof(rip6src)); rip6src.sin6_family = AF_INET6; + rip6src.sin6_len = sizeof(rip6src); /* KAME hack: recover scopeid */ in6_recoverscope(&rip6src, &ip6->ip6_src); @@ -186,7 +186,13 @@ rip6_input(struct mbuf **mp, int *offp, TAILQ_FOREACH(inp, &rawin6pcbtable.inpt_queue, inp_queue) { KASSERT(ISSET(inp->inp_flags, INP_IPV6)); - if (inp->inp_socket->so_rcv.sb_state & SS_CANTRCVMORE) + /* + * Packet must not be inserted after disconnected wakeup + * call. To avoid race, check again when holding receive + * buffer mutex. + */ + if (ISSET(READ_ONCE(inp->inp_socket->so_rcv.sb_state), + SS_CANTRCVMORE)) continue; if (rtable_l2(inp->inp_rtableid) != rtable_l2(m->m_pkthdr.ph_rtableid)) @@ -264,7 +270,7 @@ rip6_input(struct mbuf **mp, int *offp, n = m_copym(m, 0, M_COPYALL, M_NOWAIT); if (n != NULL) { struct socket *so = inp->inp_socket; - int ret; + int ret = 0; if (inp->inp_flags & IN6P_CONTROLOPTS) ip6_savecontrol(inp, n, &opts); @@ -272,12 +278,14 @@ rip6_input(struct mbuf **mp, int *offp, m_adj(n, *offp); mtx_enter(&so->so_rcv.sb_mtx); - ret = sbappendaddr(so, &so->so_rcv, - sin6tosa(&rip6src), n, opts); + if (!ISSET(inp->inp_socket->so_rcv.sb_state, + SS_CANTRCVMORE)) { + ret = sbappendaddr(so, &so->so_rcv, + sin6tosa(&rip6src), n, opts); + } mtx_leave(&so->so_rcv.sb_mtx); if (ret == 0) { - /* should notify about lost packet */ m_freem(n); m_freem(opts); rip6stat_inc(rip6s_fullsock); @@ -727,7 +735,7 @@ rip6_disconnect(struct socket *so) if ((so->so_state & SS_ISCONNECTED) == 0) return (ENOTCONN); - so->so_state &= ~SS_ISCONNECTED; /* XXX */ + soisdisconnected(so); mtx_enter(&rawin6pcbtable.inpt_mtx); inp->inp_faddr6 = in6addr_any; mtx_leave(&rawin6pcbtable.inpt_mtx);