Index | Thread | Search

From:
Vitaliy Makkoveev <mvs@openbsd.org>
Subject:
Re: Don't release NET_LOCK in vio(4) ioctl
To:
Alexander Bluhm <bluhm@openbsd.org>
Cc:
Stefan Fritsch <sf@sfritsch.de>, tech@openbsd.org
Date:
Wed, 17 Apr 2024 20:59:14 +0300

Download raw body.

Thread
> On 16 Apr 2024, at 23:58, Alexander Bluhm <bluhm@openbsd.org> wrote:
> 
> On Fri, Apr 12, 2024 at 03:38:44PM +0300, Vitaliy Makkoveev wrote:
>> On Fri, Apr 12, 2024 at 02:05:05PM +0200, Stefan Fritsch wrote:
>>> Hi,
>>> 
>>> commits f0b002d01d5 "Release the netlock when sleeping for control 
>>> messages in in vioioctl()" and 126b881f71 "Insert a workaround for per-ifp 
>>> ioctl being called w/o NET_LOCK()." in vio(4) fixed a deadlock but may 
>>> cause a crash with a protection fault trap at rt_ifa_del if addresses are 
>>> added/removed concurrently.
>>> 
>>> Reproducible by running
>>> 
>>> while true; do ifconfig vio0 -inet; done &
>>> while true; do ifconfig vio0 10.1.6.11/24; done &
>>> while true; do ifconfig vio0 down; done &
>>> while true; do ifconfig vio0 up; done &
>>> while true; do ifconfig vio0 -inet6 ; done &
>>> while true; do ifconfig vio0 inet6 2001:a61:3446:ddff:94be:52d1:d57c:1/64 ; done &
>>> 
>>> in parallel.
>>> 
>>> The diff below reverts the two above commits and changes the vio_ctrl_* 
>>> functions to prevent any concurrent control queue requests while the 
>>> device is being reset. I have seen no deadlocks or crashes with that 
>>> patch.
>>> 
>>> Comments? OKs?
>>> 
>> 
>> Sleep with netlock held stops packet processing and operations in inet
>> sockets. This is much worse when use another lock with netlock release.
>> pflowioctl() could be good example.
> 
> Holding net lock for a long time is not good.  But kernel crashes
> are worse.  I prefer short stop of packet processing during ifconfig
> over a crash in netinet code.  Here is the original bug report
> 
> https://marc.info/?l=openbsd-bugs&m=166124228103568&w=2
> 
> I would try get this in and work on fine grained vio(4) locking
> afterwards.
> 

I’m not blocking this, just pointing the problem.

> bluhm
> 
>>> The whole locking in vio(4) needs some overhaul, but I would postpone that 
>>> until vio(4) is unlocked and extended to multi-queue. Is there a good 
>>> example of how to be able to sleep in the ioctl path in some other network 
>>> driver? cad(4) uses a rwlock and seems to jump through quite a few hoops 
>>> to get it right. Is there a better example?
>>> 
>>> Cheers,
>>> Stefan
>>> 
>>> diff --git a/sys/dev/pv/if_vio.c b/sys/dev/pv/if_vio.c
>>> index 9ad8e36e346..633c00f96b4 100644
>>> --- a/sys/dev/pv/if_vio.c
>>> +++ b/sys/dev/pv/if_vio.c
>>> @@ -702,8 +702,10 @@ vio_stop(struct ifnet *ifp, int disable)
>>> 	/* only way to stop I/O and DMA is resetting... */
>>> 	virtio_reset(vsc);
>>> 	vio_rxeof(sc);
>>> -	if (vsc->sc_nvqs >= 3)
>>> +	if (vsc->sc_nvqs >= 3) {
>>> +		sc->sc_ctrl_inuse = RESET;
>>> 		vio_ctrleof(&sc->sc_vq[VQCTL]);
>>> +	}
>>> 	vio_tx_drain(sc);
>>> 	if (disable)
>>> 		vio_rx_drain(sc);
>>> @@ -715,8 +717,8 @@ vio_stop(struct ifnet *ifp, int disable)
>>> 		virtio_start_vq_intr(vsc, &sc->sc_vq[VQCTL]);
>>> 	virtio_reinit_end(vsc);
>>> 	if (vsc->sc_nvqs >= 3) {
>>> -		if (sc->sc_ctrl_inuse != FREE)
>>> -			sc->sc_ctrl_inuse = RESET;
>>> +		KASSERT(sc->sc_ctrl_inuse == RESET);
>>> +		sc->sc_ctrl_inuse = FREE;
>>> 		wakeup(&sc->sc_ctrl_inuse);
>>> 	}
>>> }
>>> @@ -1232,13 +1234,17 @@ vio_txeof(struct virtqueue *vq)
>>> 
>>> 	while (virtio_dequeue(vsc, vq, &slot, &len) == 0) {
>>> 		struct virtio_net_hdr *hdr = &sc->sc_tx_hdrs[slot];
>>> +		m = sc->sc_tx_mbufs[slot];
>>> +		if (m == NULL) {
>>> +			// interface is down by now
>>> +			continue;
>>> +		}
>>> 		r++;
>>> 		VIO_DMAMEM_SYNC(vsc, sc, hdr, sc->sc_hdr_size,
>>> 		    BUS_DMASYNC_POSTWRITE);
>>> 		bus_dmamap_sync(vsc->sc_dmat, sc->sc_tx_dmamaps[slot], 0,
>>> 		    sc->sc_tx_dmamaps[slot]->dm_mapsize,
>>> 		    BUS_DMASYNC_POSTWRITE);
>>> -		m = sc->sc_tx_mbufs[slot];
>>> 		bus_dmamap_unload(vsc->sc_dmat, sc->sc_tx_dmamaps[slot]);
>>> 		sc->sc_tx_mbufs[slot] = NULL;
>>> 		virtio_dequeue_commit(vq, slot);
>>> @@ -1363,30 +1369,13 @@ out:
>>> 	return r;
>>> }
>>> 
>>> -/*
>>> - * XXXSMP As long as some per-ifp ioctl(2)s are executed with the
>>> - * NET_LOCK() deadlocks are possible.  So release it here.
>>> - */
>>> -static inline int
>>> -vio_sleep(struct vio_softc *sc, const char *wmesg)
>>> -{
>>> -	int status = rw_status(&netlock);
>>> -
>>> -	if (status != RW_WRITE && status != RW_READ)
>>> -		return tsleep_nsec(&sc->sc_ctrl_inuse, PRIBIO|PCATCH, wmesg,
>>> -		    INFSLP);
>>> -
>>> -	return rwsleep_nsec(&sc->sc_ctrl_inuse, &netlock, PRIBIO|PCATCH, wmesg,
>>> -	    INFSLP);
>>> -}
>>> -
>>> int
>>> vio_wait_ctrl(struct vio_softc *sc)
>>> {
>>> 	int r = 0;
>>> 
>>> 	while (sc->sc_ctrl_inuse != FREE) {
>>> -		r = vio_sleep(sc, "viowait");
>>> +		r = tsleep_nsec(&sc->sc_ctrl_inuse, PRIBIO|PCATCH, "viowait", INFSLP);
>>> 		if (r == EINTR)
>>> 			return r;
>>> 	}
>>> @@ -1400,12 +1389,12 @@ vio_wait_ctrl_done(struct vio_softc *sc)
>>> {
>>> 	int r = 0;
>>> 
>>> -	while (sc->sc_ctrl_inuse != DONE && sc->sc_ctrl_inuse != RESET) {
>>> +	while (sc->sc_ctrl_inuse != DONE) {
>>> 		if (sc->sc_ctrl_inuse == RESET) {
>>> -			r = 1;
>>> +			r = ENXIO;
>>> 			break;
>>> 		}
>>> -		r = vio_sleep(sc, "viodone");
>>> +		r = tsleep_nsec(&sc->sc_ctrl_inuse, PRIBIO|PCATCH, "viodone", INFSLP);
>>> 		if (r == EINTR)
>>> 			break;
>>> 	}
>>> @@ -1415,7 +1404,8 @@ vio_wait_ctrl_done(struct vio_softc *sc)
>>> void
>>> vio_ctrl_wakeup(struct vio_softc *sc, enum vio_ctrl_state new)
>>> {
>>> -	sc->sc_ctrl_inuse = new;
>>> +	if (sc->sc_ctrl_inuse != RESET)
>>> +		sc->sc_ctrl_inuse = new;
>>> 	wakeup(&sc->sc_ctrl_inuse);
>>> }