Index | Thread | Search

From:
Vitaliy Makkoveev <otto@bsdbox.dev>
Subject:
Re: raw IPv6 input loop iterator
To:
Alexander Bluhm <bluhm@openbsd.org>
Cc:
OpenBSD Tech <tech@openbsd.org>
Date:
Fri, 8 Nov 2024 02:55:32 +0300

Download raw body.

Thread
> On 8 Nov 2024, at 01:30, Alexander Bluhm <bluhm@openbsd.org> wrote:
> 
> Hi,
> 
> Here is the final part that implements inpcb iterator for rip6
> input.  While there, make rip_input() look more like rip6_input().
> 
> ok?
> 

ok mvs

> bluhm
> 
> Index: kern/kern_sysctl.c
> ===================================================================
> RCS file: /data/mirror/openbsd/cvs/src/sys/kern/kern_sysctl.c,v
> diff -u -p -r1.453 kern_sysctl.c
> --- kern/kern_sysctl.c	5 Nov 2024 22:44:20 -0000	1.453
> +++ kern/kern_sysctl.c	7 Nov 2024 22:28:06 -0000
> @@ -1714,8 +1714,11 @@ sysctl_file(int *name, u_int namelen, ch
> #ifdef INET6
> 			mtx_enter(&rawin6pcbtable.inpt_mtx);
> 			TAILQ_FOREACH(inp, &rawin6pcbtable.inpt_queue,
> -			    inp_queue)
> +			    inp_queue) {
> +				if (in_pcb_is_iterator(inp))
> +					continue;
> 				FILLSO(inp->inp_socket);
> +			}
> 			mtx_leave(&rawin6pcbtable.inpt_mtx);
> #endif
> 			NET_UNLOCK();
> Index: netinet/raw_ip.c
> ===================================================================
> RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/raw_ip.c,v
> diff -u -p -r1.161 raw_ip.c
> --- netinet/raw_ip.c	5 Nov 2024 22:44:20 -0000	1.161
> +++ netinet/raw_ip.c	7 Nov 2024 22:28:06 -0000
> @@ -136,8 +136,6 @@ rip_input(struct mbuf **mp, int *offp, i
> 	struct inpcb_iterator iter = { .inp_table = NULL };
> 	struct inpcb *inp, *last;
> 	struct in_addr *key;
> -	struct counters_ref ref;
> -	uint64_t *counters;
> 	struct sockaddr_in ripsrc;
> 
> 	KASSERT(af == AF_INET);
> @@ -209,12 +207,15 @@ rip_input(struct mbuf **mp, int *offp, i
> 	mtx_leave(&rawcbtable.inpt_mtx);
> 
> 	if (last == NULL) {
> -		if (ip->ip_p != IPPROTO_ICMP)
> +		struct counters_ref ref;
> +		uint64_t *counters;
> +
> +		if (ip->ip_p == IPPROTO_ICMP) {
> +			m_freem(m);
> +		} else {
> 			icmp_error(m, ICMP_UNREACH, ICMP_UNREACH_PROTOCOL,
> 			    0, 0);
> -		else
> -			m_freem(m);
> -
> +		}
> 		counters = counters_enter(&ref, ipcounters);
> 		counters[ips_noproto]++;
> 		counters[ips_delivered]--;
> Index: netinet6/raw_ip6.c
> ===================================================================
> RCS file: /data/mirror/openbsd/cvs/src/sys/netinet6/raw_ip6.c,v
> diff -u -p -r1.185 raw_ip6.c
> --- netinet6/raw_ip6.c	12 Jul 2024 19:50:35 -0000	1.185
> +++ netinet6/raw_ip6.c	7 Nov 2024 22:28:06 -0000
> @@ -118,6 +118,9 @@ const struct pr_usrreqs rip6_usrreqs = {
> 	.pru_peeraddr	= in6_peeraddr,
> };
> 
> +void	rip6_sbappend(struct inpcb *, struct mbuf *, struct ip6_hdr *, int,
> +	    struct sockaddr_in6 *);
> +
> /*
>  * Initialize raw connection block queue.
>  */
> @@ -133,8 +136,8 @@ rip6_input(struct mbuf **mp, int *offp, 
> {
> 	struct mbuf *m = *mp;
> 	struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *);
> -	struct inpcb *inp;
> -	SIMPLEQ_HEAD(, inpcb) inpcblist;
> +	struct inpcb_iterator iter = { .inp_table = NULL };
> +	struct inpcb *inp, *last;
> 	struct in6_addr *key;
> 	struct sockaddr_in6 rip6src;
> 	uint8_t type;
> @@ -177,10 +180,9 @@ rip6_input(struct mbuf **mp, int *offp, 
> 		}
> 	}
> #endif
> -	SIMPLEQ_INIT(&inpcblist);
> -	rw_enter_write(&rawin6pcbtable.inpt_notify);
> 	mtx_enter(&rawin6pcbtable.inpt_mtx);
> -	TAILQ_FOREACH(inp, &rawin6pcbtable.inpt_queue, inp_queue) {
> +	last = inp = NULL;
> +	while ((inp = in_pcb_iterator(&rawin6pcbtable, inp, &iter)) != NULL) {
> 		KASSERT(ISSET(inp->inp_flags, INP_IPV6));
> 
> 		/*
> @@ -226,17 +228,26 @@ rip6_input(struct mbuf **mp, int *offp, 
> 			}
> 		}
> 
> -		in_pcbref(inp);
> -		SIMPLEQ_INSERT_TAIL(&inpcblist, inp, inp_notify);
> +		if (last != NULL) {
> +			struct mbuf *n;
> +
> +			mtx_leave(&rawin6pcbtable.inpt_mtx);
> +
> +			n = m_copym(m, 0, M_COPYALL, M_NOWAIT);
> +			if (n != NULL)
> +				rip6_sbappend(last, n, ip6, *offp, &rip6src);
> +			in_pcbunref(last);
> +
> +			mtx_enter(&rawin6pcbtable.inpt_mtx);
> +		}
> +		last = in_pcbref(inp);
> 	}
> 	mtx_leave(&rawin6pcbtable.inpt_mtx);
> 
> -	if (SIMPLEQ_EMPTY(&inpcblist)) {
> +	if (last == NULL) {
> 		struct counters_ref ref;
> 		uint64_t *counters;
> 
> -		rw_exit_write(&rawin6pcbtable.inpt_notify);
> -
> 		if (proto != IPPROTO_ICMPV6) {
> 			rip6stat_inc(rip6s_nosock);
> 			if (m->m_flags & M_MCAST)
> @@ -257,43 +268,36 @@ rip6_input(struct mbuf **mp, int *offp, 
> 		return IPPROTO_DONE;
> 	}
> 
> -	while ((inp = SIMPLEQ_FIRST(&inpcblist)) != NULL) {
> -		struct mbuf *n, *opts = NULL;
> -
> -		SIMPLEQ_REMOVE_HEAD(&inpcblist, inp_notify);
> -		if (SIMPLEQ_EMPTY(&inpcblist))
> -			n = m;
> -		else
> -			n = m_copym(m, 0, M_COPYALL, M_NOWAIT);
> -		if (n != NULL) {
> -			struct socket *so = inp->inp_socket;
> -			int ret = 0;
> -
> -			if (inp->inp_flags & IN6P_CONTROLOPTS)
> -				ip6_savecontrol(inp, n, &opts);
> -			/* strip intermediate headers */
> -			m_adj(n, *offp);
> -
> -			mtx_enter(&so->so_rcv.sb_mtx);
> -			if (!ISSET(inp->inp_socket->so_rcv.sb_state,
> -			    SS_CANTRCVMORE)) {
> -				ret = sbappendaddr(so, &so->so_rcv,
> -				    sin6tosa(&rip6src), n, opts);
> -			}
> -			mtx_leave(&so->so_rcv.sb_mtx);
> -
> -			if (ret == 0) {
> -				m_freem(n);
> -				m_freem(opts);
> -				rip6stat_inc(rip6s_fullsock);
> -			} else
> -				sorwakeup(so);
> -		}
> -		in_pcbunref(inp);
> -	}
> -	rw_exit_write(&rawin6pcbtable.inpt_notify);
> +	rip6_sbappend(last, m, ip6, *offp, &rip6src);
> +	in_pcbunref(last);
> 
> 	return IPPROTO_DONE;
> +}
> +
> +void
> +rip6_sbappend(struct inpcb *inp, struct mbuf *m, struct ip6_hdr *ip6, int hlen,
> +    struct sockaddr_in6 *rip6src)
> +{
> +	struct socket *so = inp->inp_socket;
> +	struct mbuf *opts = NULL;
> +	int ret = 0;
> +
> +	if (inp->inp_flags & IN6P_CONTROLOPTS)
> +		ip6_savecontrol(inp, m, &opts);
> +	/* strip intermediate headers */
> +	m_adj(m, hlen);
> +
> +	mtx_enter(&so->so_rcv.sb_mtx);
> +	if (!ISSET(inp->inp_socket->so_rcv.sb_state, SS_CANTRCVMORE))
> +		ret = sbappendaddr(so, &so->so_rcv, sin6tosa(rip6src), m, opts);
> +	mtx_leave(&so->so_rcv.sb_mtx);
> +
> +	if (ret == 0) {
> +		m_freem(m);
> +		m_freem(opts);
> +		rip6stat_inc(rip6s_fullsock);
> +	} else
> +		sorwakeup(so);
> }
> 
> void
>