Index | Thread | Search

From:
Mark Kettenis <mark.kettenis@xs4all.nl>
Subject:
Re: ip sysctl atomic
To:
Alexander Bluhm <bluhm@openbsd.org>
Cc:
tech@openbsd.org
Date:
Wed, 01 May 2024 13:16:37 +0200

Download raw body.

Thread
> Date: Tue, 30 Apr 2024 20:46:48 +0200
> From: Alexander Bluhm <bluhm@openbsd.org>
> 
> Hi,
> 
> As general MP safe sysctls are currently not wanted, let's implement
> it the other way around.
> 
> Access to global integer variables in net.inet.ip is made MP safe
> with read once.  Some values are passed from ip_input() to ip_fragment()
> with flags.  This keeps the view of IP processing consistent even
> if another CPU changes the sysctl value.
> 
> I kept the sysctl code as it is, but added _mpsafe functions for
> values that are used by multiple CPUs.
> 
> Using ip_defttl for IPv6 header has been fixed.  ip6_defhlim will
> be made MP safe later.
> 
> ok?

Well, you did exactly what I tried to discourage you from doing.  I
don't think this is "mpsafe" at all.  Let me explain...

> Index: kern/kern_sysctl.c
> ===================================================================
> RCS file: /data/mirror/openbsd/cvs/src/sys/kern/kern_sysctl.c,v
> diff -u -p -r1.427 kern_sysctl.c
> --- kern/kern_sysctl.c	12 Apr 2024 16:07:09 -0000	1.427
> +++ kern/kern_sysctl.c	30 Apr 2024 17:13:22 -0000
> @@ -1021,6 +1021,41 @@ sysctl_int_bounded(void *oldp, size_t *o
>  	return (0);
>  }
>  
> +int
> +sysctl_int_bounded_mpsafe(void *oldp, size_t *oldlenp, void *newp,
> +    size_t newlen, int *valp, int minimum, int maximum)
> +{
> +	int oldval, newval;
> +	int error;
> +
> +	/* read only */
> +	if (newp != NULL && minimum > maximum)
> +		return (EPERM);
> +
> +	if (oldp && *oldlenp < sizeof(int))
> +		return (ENOMEM);
> +	if (newp && newlen != sizeof(int))
> +		return (EINVAL);
> +	*oldlenp = sizeof(int);
> +
> +	/* copyin() may sleep, call it first */

This comment will become rather misleading once we drop the vslock()
for certain sysctls like mvs@ is suggesting; copyout(9) may sleep as
well at that point.

> +	if (newp) {
> +		if ((error = copyin(newp, &newval, sizeof(int))))
> +			return (error);
> +		/* outside limits */
> +		if (newval < minimum || maximum < newval)
> +			return (EINVAL);
> +	}
> +	if (oldp) {
> +		oldval = READ_ONCE(*valp);
> +		if ((error = copyout(&oldval, oldp, sizeof(int))))
> +			return (error);
> +	}
> +	if (newp)
> +		WRITE_ONCE(*valp, newval);
> +	return (0);
> +}
> +

What does "mpsafe" mean here?  The READ_ONCE() and WRITE_ONCE() here
don't really achieve anything and from a user standpoint this could
still yield inconsistent values, where two threads changing the sysctl
value concurrently could return any "old" value.  And by that I mean
that if the value of the sysctl is initailly A, thread 1 changes it to
B and thread 2 changes it to C, the final value may end up as C, but
thread 2 might return either A or B as the old value.  A truly
"mpsafe" version should probably use atomic_swap_uint(9).  Otherwise
you might just as wel just remove the locking around the existing
sysctl_bounded_arr() call.

>  /*
>   * Array of read-only or bounded integer values.
>   */
> @@ -1036,6 +1071,24 @@ sysctl_bounded_arr(const struct sysctl_b
>  		if (valpp[i].mib == name[0]) {
>  			return (sysctl_int_bounded(oldp, oldlenp, newp, newlen,
>  			    valpp[i].var, valpp[i].minimum, valpp[i].maximum));
> +		}
> +	}
> +	return (EOPNOTSUPP);
> +}
> +
> +int
> +sysctl_bounded_arr_mpsafe(const struct sysctl_bounded_args *valpp,
> +    u_int valplen, int *name, u_int namelen, void *oldp, size_t *oldlenp,
> +    void *newp, size_t newlen)
> +{
> +	u_int i;
> +	if (namelen != 1)
> +		return (ENOTDIR);
> +	for (i = 0; i < valplen; ++i) {
> +		if (valpp[i].mib == name[0]) {
> +			return (sysctl_int_bounded_mpsafe(oldp, oldlenp, newp,
> +			    newlen, valpp[i].var, valpp[i].minimum,
> +			    valpp[i].maximum));
>  		}
>  	}
>  	return (EOPNOTSUPP);
> Index: net/if_etherip.c
> ===================================================================
> RCS file: /data/mirror/openbsd/cvs/src/sys/net/if_etherip.c,v
> diff -u -p -r1.55 if_etherip.c
> --- net/if_etherip.c	13 Feb 2024 12:22:09 -0000	1.55
> +++ net/if_etherip.c	30 Apr 2024 15:24:04 -0000
> @@ -142,7 +142,7 @@ etherip_clone_create(struct if_clone *if
>  	snprintf(ifp->if_xname, sizeof(ifp->if_xname), "%s%d",
>  	    ifc->ifc_name, unit);
>  
> -	sc->sc_ttl = ip_defttl;
> +	sc->sc_ttl = READ_ONCE(ip_defttl);
>  	sc->sc_txhprio = IFQ_TOS2PRIO(IPTOS_PREC_ROUTINE); /* 0 */
>  	sc->sc_rxhprio = IF_HDRPRIO_PACKET;
>  	sc->sc_df = htons(0);
> Index: net/if_gif.c
> ===================================================================
> RCS file: /data/mirror/openbsd/cvs/src/sys/net/if_gif.c,v
> diff -u -p -r1.137 if_gif.c
> --- net/if_gif.c	1 Jan 2024 18:47:02 -0000	1.137
> +++ net/if_gif.c	30 Apr 2024 15:24:04 -0000
> @@ -154,7 +154,7 @@ gif_clone_create(struct if_clone *ifc, i
>  	ifp = &sc->sc_if;
>  
>  	sc->sc_df = htons(0);
> -	sc->sc_ttl = ip_defttl;
> +	sc->sc_ttl = READ_ONCE(ip_defttl);
>  	sc->sc_txhprio = IF_HDRPRIO_PAYLOAD;
>  	sc->sc_rxhprio = IF_HDRPRIO_PAYLOAD;
>  	sc->sc_ecn = ECN_ALLOWED;
> Index: net/if_gre.c
> ===================================================================
> RCS file: /data/mirror/openbsd/cvs/src/sys/net/if_gre.c,v
> diff -u -p -r1.178 if_gre.c
> --- net/if_gre.c	23 Dec 2023 10:52:54 -0000	1.178
> +++ net/if_gre.c	30 Apr 2024 15:24:04 -0000
> @@ -582,7 +582,7 @@ gre_clone_create(struct if_clone *ifc, i
>  	ifp->if_ioctl = gre_ioctl;
>  	ifp->if_rtrequest = p2p_rtrequest;
>  
> -	sc->sc_tunnel.t_ttl = ip_defttl;
> +	sc->sc_tunnel.t_ttl = READ_ONCE(ip_defttl);
>  	sc->sc_tunnel.t_txhprio = IF_HDRPRIO_PAYLOAD;
>  	sc->sc_tunnel.t_rxhprio = IF_HDRPRIO_PACKET;
>  	sc->sc_tunnel.t_df = htons(0);
> @@ -653,7 +653,7 @@ mgre_clone_create(struct if_clone *ifc, 
>  	ifp->if_start = mgre_start;
>  	ifp->if_ioctl = mgre_ioctl;
>  
> -	sc->sc_tunnel.t_ttl = ip_defttl;
> +	sc->sc_tunnel.t_ttl = READ_ONCE(ip_defttl);
>  	sc->sc_tunnel.t_txhprio = IF_HDRPRIO_PAYLOAD;
>  	sc->sc_tunnel.t_rxhprio = IF_HDRPRIO_PACKET;
>  	sc->sc_tunnel.t_df = htons(0);
> @@ -707,7 +707,7 @@ egre_clone_create(struct if_clone *ifc, 
>  	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
>  	ether_fakeaddr(ifp);
>  
> -	sc->sc_tunnel.t_ttl = ip_defttl;
> +	sc->sc_tunnel.t_ttl = READ_ONCE(ip_defttl);
>  	sc->sc_tunnel.t_txhprio = 0;
>  	sc->sc_tunnel.t_rxhprio = IF_HDRPRIO_PACKET;
>  	sc->sc_tunnel.t_df = htons(0);
> @@ -842,7 +842,7 @@ eoip_clone_create(struct if_clone *ifc, 
>  	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
>  	ether_fakeaddr(ifp);
>  
> -	sc->sc_tunnel.t_ttl = ip_defttl;
> +	sc->sc_tunnel.t_ttl = READ_ONCE(ip_defttl);
>  	sc->sc_tunnel.t_txhprio = 0;
>  	sc->sc_tunnel.t_rxhprio = IF_HDRPRIO_PACKET;
>  	sc->sc_tunnel.t_df = htons(0);
> @@ -3006,7 +3006,8 @@ gre_keepalive_send(void *arg)
>  	SipHash24_Update(&ctx, &gk->gk_random, sizeof(gk->gk_random));
>  	SipHash24_Final(gk->gk_digest, &ctx);
>  
> -	ttl = sc->sc_tunnel.t_ttl == -1 ? ip_defttl : sc->sc_tunnel.t_ttl;
> +	ttl = sc->sc_tunnel.t_ttl == -1 ? READ_ONCE(ip_defttl) :
> +	    sc->sc_tunnel.t_ttl;
>  
>  	m->m_pkthdr.pf.prio = sc->sc_if.if_llprio;
>  	tos = gre_l3_tos(&sc->sc_tunnel, m, IFQ_PRIO2TOS(m->m_pkthdr.pf.prio));
> Index: net/pf.c
> ===================================================================
> RCS file: /data/mirror/openbsd/cvs/src/sys/net/pf.c,v
> diff -u -p -r1.1194 pf.c
> --- net/pf.c	12 Apr 2024 16:07:09 -0000	1.1194
> +++ net/pf.c	30 Apr 2024 16:42:13 -0000
> @@ -7960,12 +7960,17 @@ done:
>  		switch (pd.naf) {
>  		case AF_INET:
>  			if (pd.dir == PF_IN) {
> -				if (ipforwarding == 0) {
> +				int flags;
> +
> +				if (READ_ONCE(ipforwarding) == 0) {
>  					ipstat_inc(ips_cantforward);
>  					action = PF_DROP;
>  					break;
>  				}
> -				ip_forward(pd.m, ifp, NULL, 1);
> +				flags = IP_FORWARDING | IP_REDIRECT;
> +				if (READ_ONCE(ip_directedbcast))
> +					SET(flags, IP_ALLOWBROADCAST);
> +				ip_forward(pd.m, ifp, NULL, flags);
>  			} else
>  				ip_output(pd.m, NULL, NULL, 0, NULL, NULL, 0);
>  			break;
> Index: netinet/if_ether.c
> ===================================================================
> RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/if_ether.c,v
> diff -u -p -r1.267 if_ether.c
> --- netinet/if_ether.c	18 Dec 2023 13:30:44 -0000	1.267
> +++ netinet/if_ether.c	30 Apr 2024 15:33:51 -0000
> @@ -386,7 +386,7 @@ arpresolve(struct ifnet *ifp, struct rte
>  
>  		/* refresh ARP entry when timeout gets close */
>  		if (rt->rt_expire != 0 &&
> -		    rt->rt_expire - arpt_keep / 8 < uptime) {
> +		    rt->rt_expire - READ_ONCE(arpt_keep) / 8 < uptime) {
>  
>  			mtx_enter(&arp_mtx);
>  			la = (struct llinfo_arp *)rt->rt_llinfo;
> @@ -449,7 +449,7 @@ arpresolve(struct ifnet *ifp, struct rte
>  				refresh = 1;
>  			else {
>  				reject = RTF_REJECT;
> -				rt->rt_expire += arpt_down;
> +				rt->rt_expire += READ_ONCE(arpt_down);
>  				la->la_asked = 0;
>  				la->la_refreshed = 0;
>  				atomic_sub_int(&la_hold_total,
> @@ -711,7 +711,7 @@ arpcache(struct ifnet *ifp, struct ether
>  	sdl->sdl_alen = sizeof(ea->arp_sha);
>  	memcpy(LLADDR(sdl), ea->arp_sha, sizeof(ea->arp_sha));
>  	if (rt->rt_expire)
> -		rt->rt_expire = uptime + arpt_keep;
> +		rt->rt_expire = uptime + READ_ONCE(arpt_keep);
>  	rt->rt_flags &= ~RTF_REJECT;
>  
>  	/* Notify userland that an ARP resolution has been done. */
> Index: netinet/in_pcb.c
> ===================================================================
> RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/in_pcb.c,v
> diff -u -p -r1.302 in_pcb.c
> --- netinet/in_pcb.c	19 Apr 2024 10:13:58 -0000	1.302
> +++ netinet/in_pcb.c	30 Apr 2024 15:24:04 -0000
> @@ -108,10 +108,10 @@ const union inpaddru zeroin46_addr;
>   * These configure the range of local port addresses assigned to
>   * "unspecified" outgoing connections/packets/whatever.
>   */
> -int ipport_firstauto = IPPORT_RESERVED;
> -int ipport_lastauto = IPPORT_USERRESERVED;
> -int ipport_hifirstauto = IPPORT_HIFIRSTAUTO;
> -int ipport_hilastauto = IPPORT_HILASTAUTO;
> +int ipport_firstauto = IPPORT_RESERVED;		/* [a] */
> +int ipport_lastauto = IPPORT_USERRESERVED;	/* [a] */
> +int ipport_hifirstauto = IPPORT_HIFIRSTAUTO;	/* [a] */
> +int ipport_hilastauto = IPPORT_HILASTAUTO;	/* [a] */
>  
>  struct baddynamicports baddynamicports;
>  struct baddynamicports rootonlyports;
> @@ -452,16 +452,16 @@ in_pcbpickport(u_int16_t *lport, const v
>  	MUTEX_ASSERT_LOCKED(&table->inpt_mtx);
>  
>  	if (inp->inp_flags & INP_HIGHPORT) {
> -		first = ipport_hifirstauto;	/* sysctl */
> -		last = ipport_hilastauto;
> +		first = READ_ONCE(ipport_hifirstauto);	/* sysctl */
> +		last = READ_ONCE(ipport_hilastauto);
>  	} else if (inp->inp_flags & INP_LOWPORT) {
>  		if (suser(p))
>  			return (EACCES);
>  		first = IPPORT_RESERVED-1; /* 1023 */
>  		last = 600;		   /* not IPPORT_RESERVED/2 */
>  	} else {
> -		first = ipport_firstauto;	/* sysctl */
> -		last = ipport_lastauto;
> +		first = READ_ONCE(ipport_firstauto);	/* sysctl */
> +		last = READ_ONCE(ipport_lastauto);
>  	}
>  	if (first < last) {
>  		lower = first;
> Index: netinet/ip_icmp.c
> ===================================================================
> RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/ip_icmp.c,v
> diff -u -p -r1.192 ip_icmp.c
> --- netinet/ip_icmp.c	16 Sep 2023 09:33:27 -0000	1.192
> +++ netinet/ip_icmp.c	30 Apr 2024 15:33:51 -0000
> @@ -589,7 +589,7 @@ reflect:
>  		struct sockaddr_in ssrc;
>  		struct rtentry *newrt = NULL;
>  
> -		if (icmp_rediraccept == 0 || ipforwarding == 1)
> +		if (icmp_rediraccept == 0 || READ_ONCE(ipforwarding) == 1)
>  			goto freeit;
>  		if (code > 3)
>  			goto badcode;
> Index: netinet/ip_input.c
> ===================================================================
> RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/ip_input.c,v
> diff -u -p -r1.393 ip_input.c
> --- netinet/ip_input.c	16 Apr 2024 12:56:39 -0000	1.393
> +++ netinet/ip_input.c	30 Apr 2024 17:13:20 -0000
> @@ -83,13 +83,20 @@
>  #include <netinet/ip_carp.h>
>  #endif
>  
> +/*
> + * Locks used to protect global variables in this file:
> + *	I	immutable after creation
> + *	a	atomic operations
> + *	N	net lock
> + */
> +
>  /* values controllable via sysctl */
> -int	ipforwarding = 0;
> -int	ipmforwarding = 0;
> -int	ipmultipath = 0;
> -int	ipsendredirects = 1;
> +int	ipforwarding = 0;			/* [a] */
> +int	ipmforwarding = 0;			/* [a] */
> +int	ipmultipath = 0;			/* [N] */
> +int	ipsendredirects = 1;			/* [a] */
>  int	ip_dosourceroute = 0;
> -int	ip_defttl = IPDEFTTL;
> +int	ip_defttl = IPDEFTTL;			/* [a] */
>  int	ip_mtudisc = 1;
>  int	ip_mtudisc_timeout = IPMTUDISCTIMEOUT;
>  int	ip_directedbcast = 0;
> @@ -101,7 +108,7 @@ struct mutex	ipq_mutex = MUTEX_INITIALIZ
>  LIST_HEAD(, ipq) ipq;
>  
>  /* Keep track of memory used for reassembly */
> -int	ip_maxqueue = 300;
> +int	ip_maxqueue = 300;	/* [a] maximum fragments in reassembly queue */
>  int	ip_frags = 0;
>  
>  const struct sysctl_bounded_args ipctl_vars[] = {
> @@ -137,8 +144,8 @@ static struct mbuf_queue	ipsendraw_mq;
>  extern struct niqueue		arpinq;
>  
>  int	ip_ours(struct mbuf **, int *, int, int);
> -int	ip_dooptions(struct mbuf *, struct ifnet *);
> -int	in_ouraddr(struct mbuf *, struct ifnet *, struct route *);
> +int	ip_dooptions(struct mbuf *, struct ifnet *, int);
> +int	in_ouraddr(struct mbuf *, struct ifnet *, struct route *, int);
>  
>  int		ip_fragcheck(struct mbuf **, int *);
>  struct mbuf *	ip_reass(struct ipqent *, struct ipq *);
> @@ -431,7 +438,7 @@ ip_input_if(struct mbuf **mp, int *offp,
>  #if NPF > 0
>  	struct in_addr odst;
>  #endif
> -	int pfrdr = 0;
> +	int flags = 0;
>  
>  	KASSERT(*offp == 0);
>  
> @@ -461,9 +468,15 @@ ip_input_if(struct mbuf **mp, int *offp,
>  		goto bad;
>  
>  	ip = mtod(m, struct ip *);
> -	pfrdr = odst.s_addr != ip->ip_dst.s_addr;
> +	if (odst.s_addr != ip->ip_dst.s_addr)
> +		SET(flags, IP_REDIRECT);
>  #endif
>  
> +	if (READ_ONCE(ipforwarding) != 0)
> +		SET(flags, IP_FORWARDING);
> +	if (READ_ONCE(ip_directedbcast))
> +		SET(flags, IP_ALLOWBROADCAST);
> +
>  	hlen = ip->ip_hl << 2;
>  
>  	/*
> @@ -472,7 +485,7 @@ ip_input_if(struct mbuf **mp, int *offp,
>  	 * error was detected (causing an icmp message
>  	 * to be sent and the original packet to be freed).
>  	 */
> -	if (hlen > sizeof (struct ip) && ip_dooptions(m, ifp)) {
> +	if (hlen > sizeof (struct ip) && ip_dooptions(m, ifp, flags)) {
>  		m = *mp = NULL;
>  		goto bad;
>  	}
> @@ -483,7 +496,7 @@ ip_input_if(struct mbuf **mp, int *offp,
>  		goto out;
>  	}
>  
> -	switch(in_ouraddr(m, ifp, &ro)) {
> +	switch(in_ouraddr(m, ifp, &ro, flags)) {
>  	case 2:
>  		goto bad;
>  	case 1:
> @@ -500,7 +513,7 @@ ip_input_if(struct mbuf **mp, int *offp,
>  		m->m_flags |= M_MCAST;
>  
>  #ifdef MROUTING
> -		if (ipmforwarding && ip_mrouter[ifp->if_rdomain]) {
> +		if (READ_ONCE(ipmforwarding) && ip_mrouter[ifp->if_rdomain]) {
>  			int error;
>  
>  			if (m->m_flags & M_EXT) {
> @@ -565,7 +578,7 @@ ip_input_if(struct mbuf **mp, int *offp,
>  	/*
>  	 * Not for us; forward if possible and desirable.
>  	 */
> -	if (ipforwarding == 0) {
> +	if (!ISSET(flags, IP_FORWARDING)) {
>  		ipstat_inc(ips_cantforward);
>  		goto bad;
>  	}
> @@ -585,7 +598,7 @@ ip_input_if(struct mbuf **mp, int *offp,
>  	}
>  #endif /* IPSEC */
>  
> -	ip_forward(m, ifp, &ro, pfrdr);
> +	ip_forward(m, ifp, &ro, flags);
>  	*mp = NULL;
>  	return IPPROTO_DONE;
>   bad:
> @@ -666,7 +679,7 @@ ip_fragcheck(struct mbuf **mp, int *offp
>  		 */
>  		if (mff || ip->ip_off) {
>  			ipstat_inc(ips_fragments);
> -			if (ip_frags + 1 > ip_maxqueue) {
> +			if (ip_frags + 1 > READ_ONCE(ip_maxqueue)) {
>  				ip_flush();
>  				ipstat_inc(ips_rcvmemdrop);
>  				goto bad;
> @@ -806,7 +819,7 @@ ip_deliver(struct mbuf **mp, int *offp, 
>  #undef IPSTAT_INC
>  
>  int
> -in_ouraddr(struct mbuf *m, struct ifnet *ifp, struct route *ro)
> +in_ouraddr(struct mbuf *m, struct ifnet *ifp, struct route *ro, int flags)
>  {
>  	struct rtentry		*rt;
>  	struct ip		*ip;
> @@ -836,7 +849,8 @@ in_ouraddr(struct mbuf *m, struct ifnet 
>  		 * if it is received on the interface with that address.
>  		 */
>  		if (ISSET(rt->rt_flags, RTF_BROADCAST) &&
> -		    (!ip_directedbcast || rt->rt_ifidx == ifp->if_index)) {
> +		    (!ISSET(flags, IP_ALLOWBROADCAST) ||
> +		    rt->rt_ifidx == ifp->if_index)) {
>  			match = 1;
>  
>  			/* Make sure M_BCAST is set */
> @@ -875,7 +889,8 @@ in_ouraddr(struct mbuf *m, struct ifnet 
>  				break;
>  			}
>  		}
> -	} else if (ipforwarding == 0 && rt->rt_ifidx != ifp->if_index &&
> +	} else if (!ISSET(flags, IP_FORWARDING) &&
> +	    rt->rt_ifidx != ifp->if_index &&
>  	    !((ifp->if_flags & IFF_LOOPBACK) || (ifp->if_type == IFT_ENC) ||
>  	    (m->m_pkthdr.pf.flags & PF_TAG_TRANSLATE_LOCALHOST))) {
>  		/* received on wrong interface. */
> @@ -1131,11 +1146,13 @@ ip_slowtimo(void)
>  void
>  ip_flush(void)
>  {
> -	int max = 50;
> +	int limit, max = 50;
>  
>  	MUTEX_ASSERT_LOCKED(&ipq_mutex);
>  
> -	while (!LIST_EMPTY(&ipq) && ip_frags > ip_maxqueue * 3 / 4 && --max) {
> +	limit = READ_ONCE(ip_maxqueue) * 3 / 4;
> +
> +	while (!LIST_EMPTY(&ipq) && ip_frags > limit && --max) {
>  		ipstat_inc(ips_fragdropped);
>  		ip_freef(LIST_FIRST(&ipq));
>  	}
> @@ -1149,7 +1166,7 @@ ip_flush(void)
>   * 0 if the packet should be processed further.
>   */
>  int
> -ip_dooptions(struct mbuf *m, struct ifnet *ifp)
> +ip_dooptions(struct mbuf *m, struct ifnet *ifp, int flags)
>  {
>  	struct ip *ip = mtod(m, struct ip *);
>  	unsigned int rtableid = m->m_pkthdr.ph_rtableid;
> @@ -1370,8 +1387,8 @@ ip_dooptions(struct mbuf *m, struct ifne
>  		}
>  	}
>  	KERNEL_UNLOCK();
> -	if (forward && ipforwarding > 0) {
> -		ip_forward(m, ifp, NULL, 1);
> +	if (forward && ISSET(flags, IP_FORWARDING)) {
> +		ip_forward(m, ifp, NULL, flags | IP_REDIRECT);
>  		return (1);
>  	}
>  	return (0);
> @@ -1521,7 +1538,7 @@ const u_char inetctlerrmap[PRC_NCMDS] = 
>   * via a source route.
>   */
>  void
> -ip_forward(struct mbuf *m, struct ifnet *ifp, struct route *ro, int srcrt)
> +ip_forward(struct mbuf *m, struct ifnet *ifp, struct route *ro, int flags)
>  {
>  	struct mbuf mfake, *mcopy;
>  	struct ip *ip = mtod(m, struct ip *);
> @@ -1587,7 +1604,7 @@ ip_forward(struct mbuf *m, struct ifnet 
>  	if ((rt->rt_ifidx == ifp->if_index) &&
>  	    (rt->rt_flags & (RTF_DYNAMIC|RTF_MODIFIED)) == 0 &&
>  	    satosin(rt_key(rt))->sin_addr.s_addr != 0 &&
> -	    ipsendredirects && !srcrt &&
> +	    READ_ONCE(ipsendredirects) && !ISSET(flags, IP_REDIRECT) &&
>  	    !arpproxy(satosin(rt_key(rt))->sin_addr, m->m_pkthdr.ph_rtableid)) {
>  		if ((ip->ip_src.s_addr & ifatoia(rt->rt_ifa)->ia_netmask) ==
>  		    ifatoia(rt->rt_ifa)->ia_net) {
> @@ -1601,9 +1618,7 @@ ip_forward(struct mbuf *m, struct ifnet 
>  		}
>  	}
>  
> -	error = ip_output(m, NULL, ro,
> -	    (IP_FORWARDING | (ip_directedbcast ? IP_ALLOWBROADCAST : 0)),
> -	    NULL, NULL, 0);
> +	error = ip_output(m, NULL, ro, flags | IP_FORWARDING, NULL, NULL, 0);
>  	rt = ro->ro_rt;
>  	if (error)
>  		ipstat_inc(ips_cantforward);
> @@ -1780,10 +1795,9 @@ ip_sysctl(int *name, u_int namelen, void
>  		NET_UNLOCK();
>  		return (error);
>  	default:
> -		NET_LOCK();
> -		error = sysctl_bounded_arr(ipctl_vars, nitems(ipctl_vars),
> -		    name, namelen, oldp, oldlenp, newp, newlen);
> -		NET_UNLOCK();
> +		error = sysctl_bounded_arr_mpsafe(ipctl_vars,
> +		    nitems(ipctl_vars), name, namelen, oldp, oldlenp, newp,
> +		    newlen);
>  		return (error);
>  	}
>  	/* NOTREACHED */
> Index: netinet/ip_ipip.c
> ===================================================================
> RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/ip_ipip.c,v
> diff -u -p -r1.101 ip_ipip.c
> --- netinet/ip_ipip.c	11 Feb 2024 01:27:45 -0000	1.101
> +++ netinet/ip_ipip.c	30 Apr 2024 15:24:04 -0000
> @@ -381,7 +381,7 @@ ipip_output(struct mbuf **mp, struct tdb
>  		ipo->ip_v = IPVERSION;
>  		ipo->ip_hl = 5;
>  		ipo->ip_len = htons(m->m_pkthdr.len);
> -		ipo->ip_ttl = ip_defttl;
> +		ipo->ip_ttl = READ_ONCE(ip_defttl);
>  		ipo->ip_sum = 0;
>  		ipo->ip_src = tdb->tdb_src.sin.sin_addr;
>  		ipo->ip_dst = tdb->tdb_dst.sin.sin_addr;
> @@ -481,7 +481,7 @@ ipip_output(struct mbuf **mp, struct tdb
>  		ip6o->ip6_vfc &= ~IPV6_VERSION_MASK;
>  		ip6o->ip6_vfc |= IPV6_VERSION;
>  		ip6o->ip6_plen = htons(m->m_pkthdr.len - sizeof(*ip6o));
> -		ip6o->ip6_hlim = ip_defttl;
> +		ip6o->ip6_hlim = ip6_defhlim;
>  		in6_embedscope(&ip6o->ip6_src, &tdb->tdb_src.sin6, NULL, NULL);
>  		in6_embedscope(&ip6o->ip6_dst, &tdb->tdb_dst.sin6, NULL, NULL);
>  
> Index: netinet/ip_mroute.c
> ===================================================================
> RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/ip_mroute.c,v
> diff -u -p -r1.142 ip_mroute.c
> --- netinet/ip_mroute.c	6 Apr 2024 14:23:27 -0000	1.142
> +++ netinet/ip_mroute.c	30 Apr 2024 15:24:04 -0000
> @@ -98,7 +98,7 @@ int mcast_debug = 1;
>  struct socket	*ip_mrouter[RT_TABLEID_MAX + 1];
>  struct rttimer_queue ip_mrouterq;
>  uint64_t	 mrt_count[RT_TABLEID_MAX + 1];
> -int		ip_mrtproto = IGMP_DVMRP;    /* for netstat only */
> +int		ip_mrtproto = IGMP_DVMRP;    /* [I] for netstat only */
>  
>  struct mrtstat	mrtstat;
>  
> Index: netinet/ip_output.c
> ===================================================================
> RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/ip_output.c,v
> diff -u -p -r1.398 ip_output.c
> --- netinet/ip_output.c	17 Apr 2024 20:48:51 -0000	1.398
> +++ netinet/ip_output.c	30 Apr 2024 15:33:51 -0000
> @@ -326,7 +326,8 @@ reroute:
>  			 * above, will be forwarded by the ip_input() routine,
>  			 * if necessary.
>  			 */
> -			if (ipmforwarding && ip_mrouter[ifp->if_rdomain] &&
> +			if (READ_ONCE(ipmforwarding) &&
> +			    ip_mrouter[ifp->if_rdomain] &&
>  			    (flags & IP_FORWARDING) == 0) {
>  				int rv;
>  
> @@ -428,7 +429,8 @@ sendit:
>  #endif
>  
>  #ifdef IPSEC
> -	if (ipsec_in_use && (flags & IP_FORWARDING) && (ipforwarding == 2) &&
> +	if (ipsec_in_use && (flags & IP_FORWARDING) &&
> +	    (READ_ONCE(ipforwarding) == 2) &&
>  	    (m_tag_find(m, PACKET_TAG_IPSEC_IN_DONE, NULL) == NULL)) {
>  		error = EHOSTUNREACH;
>  		goto bad;
> @@ -907,7 +909,8 @@ ip_ctloutput(int op, struct socket *so, 
>  					if (optval > 0 && optval <= MAXTTL)
>  						inp->inp_ip.ip_ttl = optval;
>  					else if (optval == -1)
> -						inp->inp_ip.ip_ttl = ip_defttl;
> +						inp->inp_ip.ip_ttl =	
> +						    READ_ONCE(ip_defttl);
>  					else
>  						error = EINVAL;
>  					break;
> @@ -1123,7 +1126,7 @@ ip_ctloutput(int op, struct socket *so, 
>  				break;
>  
>  			case IP_IPDEFTTL:
> -				optval = ip_defttl;
> +				optval = READ_ONCE(ip_defttl);
>  				break;
>  
>  #define	OPTBIT(bit)	(inp->inp_flags & bit ? 1 : 0)
> Index: netinet/ip_var.h
> ===================================================================
> RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/ip_var.h,v
> diff -u -p -r1.117 ip_var.h
> --- netinet/ip_var.h	17 Apr 2024 20:48:51 -0000	1.117
> +++ netinet/ip_var.h	30 Apr 2024 16:41:15 -0000
> @@ -206,6 +206,7 @@ struct ipoffnxt {
>  /* flags passed to ip_output */
>  #define	IP_FORWARDING		0x1		/* most of ip header exists */
>  #define	IP_RAWOUTPUT		0x2		/* raw ip header exists */
> +#define	IP_REDIRECT		0x4	/* redirected by pf or source route */
>  #define	IP_ALLOWBROADCAST	SO_BROADCAST	/* can send broadcast packets */
>  #define	IP_MTUDISC		0x0800		/* pmtu discovery, set DF */
>  
> @@ -226,6 +227,7 @@ extern int ipforwarding;		/* enable IP f
>  extern int ipmforwarding;		/* enable multicast forwarding */
>  #endif
>  extern int ipmultipath;			/* enable multipath routing */
> +extern int ip_directedbcast;		/* accept all broadcast packets */
>  extern unsigned int la_hold_total;
>  
>  extern const struct pr_usrreqs rip_usrreqs;
> Index: netinet/tcp_input.c
> ===================================================================
> RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/tcp_input.c,v
> diff -u -p -r1.405 tcp_input.c
> --- netinet/tcp_input.c	17 Apr 2024 20:48:51 -0000	1.405
> +++ netinet/tcp_input.c	30 Apr 2024 15:24:04 -0000
> @@ -4143,7 +4143,7 @@ syn_cache_respond(struct syn_cache *sc, 
>  	switch (sc->sc_src.sa.sa_family) {
>  	case AF_INET:
>  		ip->ip_len = htons(tlen);
> -		ip->ip_ttl = inp ? inp->inp_ip.ip_ttl : ip_defttl;
> +		ip->ip_ttl = inp ? inp->inp_ip.ip_ttl : READ_ONCE(ip_defttl);
>  		if (inp != NULL)
>  			ip->ip_tos = inp->inp_ip.ip_tos;
>  
> Index: netinet/tcp_subr.c
> ===================================================================
> RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/tcp_subr.c,v
> diff -u -p -r1.201 tcp_subr.c
> --- netinet/tcp_subr.c	17 Apr 2024 20:48:51 -0000	1.201
> +++ netinet/tcp_subr.c	30 Apr 2024 15:24:04 -0000
> @@ -411,7 +411,7 @@ tcp_respond(struct tcpcb *tp, caddr_t te
>  #endif /* INET6 */
>  	case AF_INET:
>  		ip->ip_len = htons(tlen);
> -		ip->ip_ttl = ip_defttl;
> +		ip->ip_ttl = READ_ONCE(ip_defttl);
>  		ip->ip_tos = 0;
>  		ip_output(m, NULL,
>  		    tp ? &tp->t_inpcb->inp_route : NULL,
> @@ -471,7 +471,7 @@ tcp_newtcpcb(struct inpcb *inp, int wait
>  #endif
>  	{
>  		tp->pf = PF_INET;
> -		inp->inp_ip.ip_ttl = ip_defttl;
> +		inp->inp_ip.ip_ttl = READ_ONCE(ip_defttl);
>  	}
>  
>  	inp->inp_ppcb = (caddr_t)tp;
> Index: netinet/udp_usrreq.c
> ===================================================================
> RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/udp_usrreq.c,v
> diff -u -p -r1.320 udp_usrreq.c
> --- netinet/udp_usrreq.c	17 Apr 2024 20:48:51 -0000	1.320
> +++ netinet/udp_usrreq.c	30 Apr 2024 15:24:04 -0000
> @@ -1121,7 +1121,7 @@ udp_attach(struct socket *so, int proto,
>  		sotoinpcb(so)->inp_ipv6.ip6_hlim = ip6_defhlim;
>  	else
>  #endif
> -		sotoinpcb(so)->inp_ip.ip_ttl = ip_defttl;
> +		sotoinpcb(so)->inp_ip.ip_ttl = READ_ONCE(ip_defttl);
>  	return 0;
>  }
>  
> Index: sys/sysctl.h
> ===================================================================
> RCS file: /data/mirror/openbsd/cvs/src/sys/sys/sysctl.h,v
> diff -u -p -r1.235 sysctl.h
> --- sys/sysctl.h	1 Oct 2023 15:58:12 -0000	1.235
> +++ sys/sysctl.h	30 Apr 2024 17:13:49 -0000
> @@ -1059,7 +1059,11 @@ int sysctl_int(void *, size_t *, void *,
>  int sysctl_rdint(void *, size_t *, void *, int);
>  int sysctl_securelevel_int(void *, size_t *, void *, size_t, int *);
>  int sysctl_int_bounded(void *, size_t *, void *, size_t, int *, int, int);
> +int sysctl_int_bounded_mpsafe(void *, size_t *, void *, size_t, int *, int,
> +    int);
>  int sysctl_bounded_arr(const struct sysctl_bounded_args *, u_int,
> +    int *, u_int, void *, size_t *, void *, size_t);
> +int sysctl_bounded_arr_mpsafe(const struct sysctl_bounded_args *, u_int,
>      int *, u_int, void *, size_t *, void *, size_t);
>  int sysctl_quad(void *, size_t *, void *, size_t, int64_t *);
>  int sysctl_rdquad(void *, size_t *, void *, int64_t);
> 
>