Index | Thread | Search

From:
Kirill A. Korinsky <kirill@korins.ky>
Subject:
Re: sys/uvideo: avoid using queue.h
To:
tech@openbsd.org, mglocker@openbsd.org
Date:
Fri, 14 Mar 2025 14:16:23 +0100

Download raw body.

Thread
  • Kirill A. Korinsky:

    sys/uvideo: avoid using queue.h

  • On Fri, 14 Mar 2025 14:10:01 +0100,
    Kirill A. Korinsky <kirill@korins.ky> wrote:
    > 
    > On Thu, 06 Mar 2025 09:36:46 +0100,
    > Kirill A. Korinsky <kirill@korins.ky> wrote:
    > >
    > > tech@,
    > >
    > > macroses from queue.h doesn't thread safe and wraping it by mutex on
    > > uvideo use case seems isn't so good idea.
    > >
    > > It has a hardcoded number of used buffers as 8 and already has a code
    > > which iterates against them to find the buffer which is ready for
    > > queueing in uvideo_mmap_getbuf.
    > >
    > > Here I'd like to replace SIMPLEQ_* by similar logic.
    > >
    > > It fixes two bugs:
    > >
    > > 1. If isochronous webcam sends two frames in one transfer,
    > > SIMPLEQ_INSERT_TAIL adds the same element twice and creates infinity
    > > loop inside sc_mmap_q which will be iterating in uvideo_vs_free_frame.
    > >
    > > 2. Prevents from calling SIMPLEQ_INSERT_TAIL and SIMPLEQ_REMOVE_HEAD for
    > > the same element in the queue which may lead to unexpected state.
    > >
    > > This diff was suceffuly tested on:
    > >  - Elgato Facecam Pro (isochronous)
    > >  - Jabra PanaCast (bulk)
    > >
    > 
    > Here an alternative approach where I had implemented Multi Producer Multi
    > Consumer, wait-free and fixed size FIFO queue which is thread safe.
    > 
    > I had replaced both for-loop to it, and it works.
    > 
    > It can be done faster by replacing % (reminder) by mask but it requires to
    > change UVIDEO_MAX_BUFFERS to something in form 2^x - 1.
    > 
    > Use 7 as UVIDEO_MAX_BUFFERS is safe in term of memory usage, but at least
    > ffmpeg had 8 as hardcoded constant which changes its behaviour, if I recall
    > rigth.
    > 
    > Put 15 should be safe as well, but 4K frames allocates ~16 Mb memory and it
    > turns to ~240Mb for mmap memory. It should be fine for amd64 and arm64, but
    > I have no idea about other architectures.
    > 
    > So, here reminder.
    > 
    > Regarding the queue algorithm itself.
    > 
    > I don't know who invented it on the first place, but this is a version which
    > I had invented independently in 2016 or 2017 for some large Java app, and
    > which is uses it under load for years without any knwon to me issues.
    > 
    > So, it's fine to be licensed under ISC and as far as I know it isn't
    > patented, but I haven't done any patent search.
    > 
    > But it is quite trivial, and I assume that it is well known algorithm
    > for someone.
    > 
    > Any test and feedback is welcome.
    >
    
    Ooops, I had sent a wrong diff. Here the right one.
    
    Index: sys/dev/usb/uvideo.c
    ===================================================================
    RCS file: /home/cvs/src/sys/dev/usb/uvideo.c,v
    diff -u -p -r1.252 uvideo.c
    --- sys/dev/usb/uvideo.c	12 Mar 2025 14:08:51 -0000	1.252
    +++ sys/dev/usb/uvideo.c	14 Mar 2025 13:11:08 -0000
    @@ -24,6 +24,7 @@
     #include <sys/stat.h>
     #include <sys/kthread.h>
     #include <sys/stdint.h>
    +#include <sys/atomic.h>
     
     #include <uvm/uvm_extern.h>
     
    @@ -50,6 +51,20 @@ int uvideo_debug = 1;
     #define byteof(x) ((x) >> 3)
     #define bitof(x)  (1L << ((x) & 0x7))
     
    +/* queue needs at least one empty element, to avoid inifity loop */
    +#define UVIDEO_MMAP_QUEUE_SIZE	(UVIDEO_MAX_BUFFERS + 1)
    +
    +struct uvideo_mmap_queue_item {
    +	unsigned long				 qi_seq;
    +	int					 qi_idx;
    +};
    +
    +struct uvideo_mmap_queue {
    +	struct uvideo_mmap_queue_item		 q_items[UVIDEO_MMAP_QUEUE_SIZE];
    +	unsigned long				 q_head_seq;
    +	unsigned long				 q_tail_seq;
    +};
    +
     struct uvideo_softc {
     	struct device				 sc_dev;
     	struct usbd_device			*sc_udev;
    @@ -68,10 +83,12 @@ struct uvideo_softc {
     	struct uvideo_mmap			 sc_mmap[UVIDEO_MAX_BUFFERS];
     	uint8_t					*sc_mmap_buffer;
     	size_t					 sc_mmap_buffer_size;
    -	q_mmap					 sc_mmap_q;
     	int					 sc_mmap_count;
     	int					 sc_mmap_flag;
     
    +	struct uvideo_mmap_queue		 sc_mmap_q_in;
    +	struct uvideo_mmap_queue		 sc_mmap_q_out;
    +
     	struct vnode				*sc_vp;
     	struct usb_task				 sc_task_write;
     
    @@ -173,6 +190,8 @@ void		uvideo_vs_decode_stream_header(str
     void		uvideo_vs_decode_stream_header_isight(struct uvideo_softc *,
     		    uint8_t *, int);
     void		uvideo_mmap_queue(struct uvideo_softc *, uint8_t *, int, int);
    +int		uvideo_mmap_queue_dequeue(struct uvideo_mmap_queue *);
    +int		uvideo_mmap_queue_enqueue(struct uvideo_mmap_queue *, int);
     void		uvideo_read(struct uvideo_softc *, uint8_t *, int);
     usbd_status	uvideo_usb_control(struct uvideo_softc *, uint8_t, uint8_t,
     		    uint16_t, uint8_t *, size_t);
    @@ -669,8 +688,6 @@ uvideo_attach_hook(struct device *self)
     	if (error != USBD_NORMAL_COMPLETION)
     		return;
     
    -	/* init mmap queue */
    -	SIMPLEQ_INIT(&sc->sc_mmap_q);
     	sc->sc_mmap_count = 0;
     
     	DPRINTF(1, "uvideo_attach: doing video_attach_mi\n");
    @@ -1960,9 +1977,6 @@ uvideo_vs_free_frame(struct uvideo_softc
     		sc->sc_mmap_buffer_size = 0;
     	}
     
    -	while (!SIMPLEQ_EMPTY(&sc->sc_mmap_q))
    -		SIMPLEQ_REMOVE_HEAD(&sc->sc_mmap_q, q_frames);
    -
     	sc->sc_mmap_count = 0;
     }
     
    @@ -2498,6 +2512,57 @@ uvideo_vs_decode_stream_header_isight(st
     	}
     }
     
    +static inline int
    +uvideo_mmap_queue_op(struct uvideo_mmap_queue_item *items,
    +    unsigned long *seq, int dequeue, int idx) {
    +	long diff;
    +	unsigned long s;
    +	struct uvideo_mmap_queue_item *item;
    +
    +	for (;;) {
    +		s = atomic_load_long(seq);
    +		item = &items[(int)(s % UVIDEO_MMAP_QUEUE_SIZE)];
    +		diff = item->qi_seq - (s + dequeue);
    +
    +		if (diff == 0) {
    +			/* item can be locked */
    +			if (atomic_cas_ulong(seq, s, s + 1) != s)
    +				continue;
    +
    +			/* it is locked */
    +
    +			if (dequeue) {
    +				idx = item->qi_idx;
    +				item->qi_seq = s + UVIDEO_MMAP_QUEUE_SIZE;
    +				return idx;
    +			} else {
    +				item->qi_idx = idx;
    +				item->qi_seq++;
    +				return 0;
    +			}
    +		}
    +
    +		if (diff < 0) {
    +			/* queue is full */
    +			return -1;
    +		}
    +
    +		/* diff > 0 - means another thread moved sequence */
    +	}
    +}
    +
    +int
    +uvideo_mmap_queue_dequeue(struct uvideo_mmap_queue *q)
    +{
    +	return uvideo_mmap_queue_op(q->q_items, &q->q_tail_seq, 1, -1);
    +}
    +
    +int
    +uvideo_mmap_queue_enqueue(struct uvideo_mmap_queue *q, int idx)
    +{
    +	return uvideo_mmap_queue_op(q->q_items, &q->q_head_seq, 0, idx);
    +}
    +
     void
     uvideo_mmap_queue(struct uvideo_softc *sc, uint8_t *buf, int len, int err)
     {
    @@ -2507,11 +2572,9 @@ uvideo_mmap_queue(struct uvideo_softc *s
     		panic("%s: mmap buffers not allocated", __func__);
     
     	/* find a buffer which is ready for queueing */
    -	for (i = 0; i < sc->sc_mmap_count; i++) {
    -		if (sc->sc_mmap[i].v4l2_buf.flags & V4L2_BUF_FLAG_QUEUED)
    -			break;
    -	}
    -	if (i == sc->sc_mmap_count) {
    +	i = uvideo_mmap_queue_dequeue(&sc->sc_mmap_q_in);
    +
    +	if (i < 0) {
     		DPRINTF(1, "%s: %s: mmap queue is full!\n",
     		    DEVNAME(sc), __func__);
     		return;
    @@ -2537,7 +2600,10 @@ uvideo_mmap_queue(struct uvideo_softc *s
     	/* queue it */
     	sc->sc_mmap[i].v4l2_buf.flags |= V4L2_BUF_FLAG_DONE;
     	sc->sc_mmap[i].v4l2_buf.flags &= ~V4L2_BUF_FLAG_QUEUED;
    -	SIMPLEQ_INSERT_TAIL(&sc->sc_mmap_q, &sc->sc_mmap[i], q_frames);
    +
    +	if (uvideo_mmap_queue_enqueue(&sc->sc_mmap_q_out, i))
    +		panic("%s: mmap queue overflow", __func__);
    +
     	DPRINTF(2, "%s: %s: frame queued on index %d\n",
     	    DEVNAME(sc), __func__, i);
     
    @@ -3553,6 +3619,20 @@ uvideo_reqbufs(void *v, struct v4l2_requ
     		    sc->sc_mmap[i].v4l2_buf.length);
     	}
     
    +	/* reset queues */
    +	for (i = 0; i < UVIDEO_MMAP_QUEUE_SIZE; i++) {
    +		sc->sc_mmap_q_in.q_items[i].qi_seq = i;
    +		sc->sc_mmap_q_in.q_items[i].qi_idx = -1;
    +
    +		sc->sc_mmap_q_out.q_items[i].qi_seq = i;
    +		sc->sc_mmap_q_out.q_items[i].qi_idx = -1;
    +	}
    +
    +	sc->sc_mmap_q_in.q_head_seq = 0;
    +	sc->sc_mmap_q_in.q_tail_seq = 0;
    +	sc->sc_mmap_q_out.q_head_seq = 0;
    +	sc->sc_mmap_q_out.q_tail_seq = 0;
    +
     	/* tell how many buffers we have really allocated */
     	rb->count = sc->sc_mmap_count;
     
    @@ -3586,6 +3666,7 @@ uvideo_querybuf(void *v, struct v4l2_buf
     int
     uvideo_qbuf(void *v, struct v4l2_buffer *qb)
     {
    +	int flags;
     	struct uvideo_softc *sc = v;
     
     	if (qb->type != V4L2_BUF_TYPE_VIDEO_CAPTURE ||
    @@ -3593,9 +3674,15 @@ uvideo_qbuf(void *v, struct v4l2_buffer 
     	    qb->index >= sc->sc_mmap_count)
     		return (EINVAL);
     
    +	flags = sc->sc_mmap[qb->index].v4l2_buf.flags;
     	sc->sc_mmap[qb->index].v4l2_buf.flags &= ~V4L2_BUF_FLAG_DONE;
     	sc->sc_mmap[qb->index].v4l2_buf.flags |= V4L2_BUF_FLAG_QUEUED;
     
    +	if (uvideo_mmap_queue_enqueue(&sc->sc_mmap_q_in, qb->index)) {
    +		sc->sc_mmap[qb->index].v4l2_buf.flags = flags;
    +		return (EINVAL);
    +	}
    +
     	DPRINTF(2, "%s: %s: buffer on index %d ready for queueing\n",
     	    DEVNAME(sc), __func__, qb->index);
     
    @@ -3605,24 +3692,26 @@ uvideo_qbuf(void *v, struct v4l2_buffer 
     int
     uvideo_dqbuf(void *v, struct v4l2_buffer *dqb)
     {
    -	struct uvideo_softc *sc = v;
     	struct uvideo_mmap *mmap;
    -	int error;
    +	struct uvideo_softc *sc = v;
    +	int error, i;
     
     	if (dqb->type != V4L2_BUF_TYPE_VIDEO_CAPTURE ||
     	    dqb->memory != V4L2_MEMORY_MMAP)
     		return (EINVAL);
     
    -	if (SIMPLEQ_EMPTY(&sc->sc_mmap_q)) {
    +again:
    +	i = uvideo_mmap_queue_dequeue(&sc->sc_mmap_q_out);
    +
    +	if (i < 0) {
     		/* mmap queue is empty, block until first frame is queued */
     		error = tsleep_nsec(sc, 0, "vid_mmap", SEC_TO_NSEC(10));
     		if (error)
     			return (EINVAL);
    +		goto again;
     	}
     
    -	mmap = SIMPLEQ_FIRST(&sc->sc_mmap_q);
    -	if (mmap == NULL)
    -		panic("uvideo_dqbuf: NULL pointer!");
    +	mmap = &sc->sc_mmap[i];
     
     	bcopy(&mmap->v4l2_buf, dqb, sizeof(struct v4l2_buffer));
     
    @@ -3631,7 +3720,6 @@ uvideo_dqbuf(void *v, struct v4l2_buffer
     
     	DPRINTF(2, "%s: %s: frame dequeued from index %d\n",
     	    DEVNAME(sc), __func__, mmap->v4l2_buf.index);
    -	SIMPLEQ_REMOVE_HEAD(&sc->sc_mmap_q, q_frames);
     
     	return (0);
     }
    Index: sys/dev/usb/uvideo.h
    ===================================================================
    RCS file: /home/cvs/src/sys/dev/usb/uvideo.h,v
    diff -u -p -r1.64 uvideo.h
    --- sys/dev/usb/uvideo.h	26 Feb 2025 20:50:46 -0000	1.64
    +++ sys/dev/usb/uvideo.h	14 Mar 2025 12:16:09 -0000
    @@ -553,11 +553,9 @@ struct uvideo_frame_buffer {
      */
     #define UVIDEO_MAX_BUFFERS	8
     struct uvideo_mmap {
    -	SIMPLEQ_ENTRY(uvideo_mmap)	q_frames;
     	uint8_t				*buf;
     	struct v4l2_buffer		 v4l2_buf;
     };
    -typedef SIMPLEQ_HEAD(, uvideo_mmap) q_mmap;
     
     struct uvideo_format_group {
     	uint32_t				 pixelformat;
    
    
  • Kirill A. Korinsky:

    sys/uvideo: avoid using queue.h