Index | Thread | Search

From:
Marcus Glocker <marcus@nazgul.ch>
Subject:
Re: sys/uvideo: avoid using queue.h
To:
"Kirill A. Korinsky" <kirill@korins.ky>
Cc:
tech@openbsd.org
Date:
Sun, 16 Mar 2025 08:24:29 +0100

Download raw body.

Thread
On Fri, Mar 14, 2025 at 02:16:23PM GMT, Kirill A. Korinsky wrote:

> On Fri, 14 Mar 2025 14:10:01 +0100,
> Kirill A. Korinsky <kirill@korins.ky> wrote:
> > 
> > On Thu, 06 Mar 2025 09:36:46 +0100,
> > Kirill A. Korinsky <kirill@korins.ky> wrote:
> > >
> > > tech@,
> > >
> > > macroses from queue.h doesn't thread safe and wraping it by mutex on
> > > uvideo use case seems isn't so good idea.
> > >
> > > It has a hardcoded number of used buffers as 8 and already has a code
> > > which iterates against them to find the buffer which is ready for
> > > queueing in uvideo_mmap_getbuf.
> > >
> > > Here I'd like to replace SIMPLEQ_* by similar logic.
> > >
> > > It fixes two bugs:
> > >
> > > 1. If isochronous webcam sends two frames in one transfer,
> > > SIMPLEQ_INSERT_TAIL adds the same element twice and creates infinity
> > > loop inside sc_mmap_q which will be iterating in uvideo_vs_free_frame.
> > >
> > > 2. Prevents from calling SIMPLEQ_INSERT_TAIL and SIMPLEQ_REMOVE_HEAD for
> > > the same element in the queue which may lead to unexpected state.
> > >
> > > This diff was suceffuly tested on:
> > >  - Elgato Facecam Pro (isochronous)
> > >  - Jabra PanaCast (bulk)
> > >
> > 
> > Here an alternative approach where I had implemented Multi Producer Multi
> > Consumer, wait-free and fixed size FIFO queue which is thread safe.
> > 
> > I had replaced both for-loop to it, and it works.
> > 
> > It can be done faster by replacing % (reminder) by mask but it requires to
> > change UVIDEO_MAX_BUFFERS to something in form 2^x - 1.
> > 
> > Use 7 as UVIDEO_MAX_BUFFERS is safe in term of memory usage, but at least
> > ffmpeg had 8 as hardcoded constant which changes its behaviour, if I recall
> > rigth.
> > 
> > Put 15 should be safe as well, but 4K frames allocates ~16 Mb memory and it
> > turns to ~240Mb for mmap memory. It should be fine for amd64 and arm64, but
> > I have no idea about other architectures.
> > 
> > So, here reminder.
> > 
> > Regarding the queue algorithm itself.
> > 
> > I don't know who invented it on the first place, but this is a version which
> > I had invented independently in 2016 or 2017 for some large Java app, and
> > which is uses it under load for years without any knwon to me issues.
> > 
> > So, it's fine to be licensed under ISC and as far as I know it isn't
> > patented, but I haven't done any patent search.
> > 
> > But it is quite trivial, and I assume that it is well known algorithm
> > for someone.
> > 
> > Any test and feedback is welcome.
> >
> 
> Ooops, I had sent a wrong diff. Here the right one.

I'm not familiar with this specific queuing algorithm, but it seems to
work :-)

I gave it a spin, and I can see that FIFO is working, and that it always
iterates through all the buffers, also with lower frame rates.  This is
in line with the behavior of the Linux driver, and is also an improvement
compared to the current SIMPLEQ implementation, which is only consuming
buffer zero, mostly.

I will need to do some more testing.

Some initial in-line comments.
 
> Index: sys/dev/usb/uvideo.c
> ===================================================================
> RCS file: /home/cvs/src/sys/dev/usb/uvideo.c,v
> diff -u -p -r1.252 uvideo.c
> --- sys/dev/usb/uvideo.c	12 Mar 2025 14:08:51 -0000	1.252
> +++ sys/dev/usb/uvideo.c	14 Mar 2025 13:11:08 -0000
> @@ -24,6 +24,7 @@
>  #include <sys/stat.h>
>  #include <sys/kthread.h>
>  #include <sys/stdint.h>
> +#include <sys/atomic.h>
>  
>  #include <uvm/uvm_extern.h>
>  
> @@ -50,6 +51,20 @@ int uvideo_debug = 1;
>  #define byteof(x) ((x) >> 3)
>  #define bitof(x)  (1L << ((x) & 0x7))
>  
> +/* queue needs at least one empty element, to avoid inifity loop */

"infinity".

> +#define UVIDEO_MMAP_QUEUE_SIZE	(UVIDEO_MAX_BUFFERS + 1)
> +
> +struct uvideo_mmap_queue_item {
> +	unsigned long				 qi_seq;
> +	int					 qi_idx;
> +};
> +
> +struct uvideo_mmap_queue {
> +	struct uvideo_mmap_queue_item		 q_items[UVIDEO_MMAP_QUEUE_SIZE];

Line break, maybe remove the additional space indentations for these new
structures.

> +	unsigned long				 q_head_seq;
> +	unsigned long				 q_tail_seq;
> +};
> +
>  struct uvideo_softc {
>  	struct device				 sc_dev;
>  	struct usbd_device			*sc_udev;
> @@ -68,10 +83,12 @@ struct uvideo_softc {
>  	struct uvideo_mmap			 sc_mmap[UVIDEO_MAX_BUFFERS];
>  	uint8_t					*sc_mmap_buffer;
>  	size_t					 sc_mmap_buffer_size;
> -	q_mmap					 sc_mmap_q;
>  	int					 sc_mmap_count;
>  	int					 sc_mmap_flag;
>  
> +	struct uvideo_mmap_queue		 sc_mmap_q_in;
> +	struct uvideo_mmap_queue		 sc_mmap_q_out;
> +
>  	struct vnode				*sc_vp;
>  	struct usb_task				 sc_task_write;
>  
> @@ -173,6 +190,8 @@ void		uvideo_vs_decode_stream_header(str
>  void		uvideo_vs_decode_stream_header_isight(struct uvideo_softc *,
>  		    uint8_t *, int);
>  void		uvideo_mmap_queue(struct uvideo_softc *, uint8_t *, int, int);
> +int		uvideo_mmap_queue_dequeue(struct uvideo_mmap_queue *);
> +int		uvideo_mmap_queue_enqueue(struct uvideo_mmap_queue *, int);
>  void		uvideo_read(struct uvideo_softc *, uint8_t *, int);
>  usbd_status	uvideo_usb_control(struct uvideo_softc *, uint8_t, uint8_t,
>  		    uint16_t, uint8_t *, size_t);
> @@ -669,8 +688,6 @@ uvideo_attach_hook(struct device *self)
>  	if (error != USBD_NORMAL_COMPLETION)
>  		return;
>  
> -	/* init mmap queue */
> -	SIMPLEQ_INIT(&sc->sc_mmap_q);
>  	sc->sc_mmap_count = 0;
>  
>  	DPRINTF(1, "uvideo_attach: doing video_attach_mi\n");
> @@ -1960,9 +1977,6 @@ uvideo_vs_free_frame(struct uvideo_softc
>  		sc->sc_mmap_buffer_size = 0;
>  	}
>  
> -	while (!SIMPLEQ_EMPTY(&sc->sc_mmap_q))
> -		SIMPLEQ_REMOVE_HEAD(&sc->sc_mmap_q, q_frames);
> -
>  	sc->sc_mmap_count = 0;
>  }
>  
> @@ -2498,6 +2512,57 @@ uvideo_vs_decode_stream_header_isight(st
>  	}
>  }
>  
> +static inline int
> +uvideo_mmap_queue_op(struct uvideo_mmap_queue_item *items,
> +    unsigned long *seq, int dequeue, int idx) {
> +	long diff;
> +	unsigned long s;
> +	struct uvideo_mmap_queue_item *item;
> +
> +	for (;;) {
> +		s = atomic_load_long(seq);
> +		item = &items[(int)(s % UVIDEO_MMAP_QUEUE_SIZE)];
> +		diff = item->qi_seq - (s + dequeue);
> +
> +		if (diff == 0) {
> +			/* item can be locked */
> +			if (atomic_cas_ulong(seq, s, s + 1) != s)
> +				continue;
> +
> +			/* it is locked */
> +
> +			if (dequeue) {
> +				idx = item->qi_idx;
> +				item->qi_seq = s + UVIDEO_MMAP_QUEUE_SIZE;
> +				return idx;
> +			} else {
> +				item->qi_idx = idx;
> +				item->qi_seq++;
> +				return 0;
> +			}
> +		}
> +
> +		if (diff < 0) {
> +			/* queue is full */
> +			return -1;
> +		}
> +
> +		/* diff > 0 - means another thread moved sequence */
> +	}
> +}
> +
> +int
> +uvideo_mmap_queue_dequeue(struct uvideo_mmap_queue *q)
> +{
> +	return uvideo_mmap_queue_op(q->q_items, &q->q_tail_seq, 1, -1);
> +}
> +
> +int
> +uvideo_mmap_queue_enqueue(struct uvideo_mmap_queue *q, int idx)
> +{
> +	return uvideo_mmap_queue_op(q->q_items, &q->q_head_seq, 0, idx);
> +}
> +
>  void
>  uvideo_mmap_queue(struct uvideo_softc *sc, uint8_t *buf, int len, int err)
>  {
> @@ -2507,11 +2572,9 @@ uvideo_mmap_queue(struct uvideo_softc *s
>  		panic("%s: mmap buffers not allocated", __func__);
>  
>  	/* find a buffer which is ready for queueing */
> -	for (i = 0; i < sc->sc_mmap_count; i++) {
> -		if (sc->sc_mmap[i].v4l2_buf.flags & V4L2_BUF_FLAG_QUEUED)
> -			break;
> -	}
> -	if (i == sc->sc_mmap_count) {
> +	i = uvideo_mmap_queue_dequeue(&sc->sc_mmap_q_in);
> +
> +	if (i < 0) {
>  		DPRINTF(1, "%s: %s: mmap queue is full!\n",
>  		    DEVNAME(sc), __func__);
>  		return;
> @@ -2537,7 +2600,10 @@ uvideo_mmap_queue(struct uvideo_softc *s
>  	/* queue it */
>  	sc->sc_mmap[i].v4l2_buf.flags |= V4L2_BUF_FLAG_DONE;
>  	sc->sc_mmap[i].v4l2_buf.flags &= ~V4L2_BUF_FLAG_QUEUED;
> -	SIMPLEQ_INSERT_TAIL(&sc->sc_mmap_q, &sc->sc_mmap[i], q_frames);
> +
> +	if (uvideo_mmap_queue_enqueue(&sc->sc_mmap_q_out, i))
> +		panic("%s: mmap queue overflow", __func__);
> +
>  	DPRINTF(2, "%s: %s: frame queued on index %d\n",
>  	    DEVNAME(sc), __func__, i);
>  
> @@ -3553,6 +3619,20 @@ uvideo_reqbufs(void *v, struct v4l2_requ
>  		    sc->sc_mmap[i].v4l2_buf.length);
>  	}
>  
> +	/* reset queues */
> +	for (i = 0; i < UVIDEO_MMAP_QUEUE_SIZE; i++) {
> +		sc->sc_mmap_q_in.q_items[i].qi_seq = i;
> +		sc->sc_mmap_q_in.q_items[i].qi_idx = -1;
> +
> +		sc->sc_mmap_q_out.q_items[i].qi_seq = i;
> +		sc->sc_mmap_q_out.q_items[i].qi_idx = -1;
> +	}
> +
> +	sc->sc_mmap_q_in.q_head_seq = 0;
> +	sc->sc_mmap_q_in.q_tail_seq = 0;
> +	sc->sc_mmap_q_out.q_head_seq = 0;
> +	sc->sc_mmap_q_out.q_tail_seq = 0;
> +
>  	/* tell how many buffers we have really allocated */
>  	rb->count = sc->sc_mmap_count;
>  
> @@ -3586,6 +3666,7 @@ uvideo_querybuf(void *v, struct v4l2_buf
>  int
>  uvideo_qbuf(void *v, struct v4l2_buffer *qb)
>  {
> +	int flags;
>  	struct uvideo_softc *sc = v;
>  
>  	if (qb->type != V4L2_BUF_TYPE_VIDEO_CAPTURE ||
> @@ -3593,9 +3674,15 @@ uvideo_qbuf(void *v, struct v4l2_buffer 
>  	    qb->index >= sc->sc_mmap_count)
>  		return (EINVAL);
>  
> +	flags = sc->sc_mmap[qb->index].v4l2_buf.flags;
>  	sc->sc_mmap[qb->index].v4l2_buf.flags &= ~V4L2_BUF_FLAG_DONE;
>  	sc->sc_mmap[qb->index].v4l2_buf.flags |= V4L2_BUF_FLAG_QUEUED;
>  
> +	if (uvideo_mmap_queue_enqueue(&sc->sc_mmap_q_in, qb->index)) {
> +		sc->sc_mmap[qb->index].v4l2_buf.flags = flags;
> +		return (EINVAL);
> +	}
> +
>  	DPRINTF(2, "%s: %s: buffer on index %d ready for queueing\n",
>  	    DEVNAME(sc), __func__, qb->index);
>  
> @@ -3605,24 +3692,26 @@ uvideo_qbuf(void *v, struct v4l2_buffer 
>  int
>  uvideo_dqbuf(void *v, struct v4l2_buffer *dqb)
>  {
> -	struct uvideo_softc *sc = v;
>  	struct uvideo_mmap *mmap;
> -	int error;
> +	struct uvideo_softc *sc = v;
> +	int error, i;
>  
>  	if (dqb->type != V4L2_BUF_TYPE_VIDEO_CAPTURE ||
>  	    dqb->memory != V4L2_MEMORY_MMAP)
>  		return (EINVAL);
>  
> -	if (SIMPLEQ_EMPTY(&sc->sc_mmap_q)) {
> +again:
> +	i = uvideo_mmap_queue_dequeue(&sc->sc_mmap_q_out);
> +
> +	if (i < 0) {
>  		/* mmap queue is empty, block until first frame is queued */
>  		error = tsleep_nsec(sc, 0, "vid_mmap", SEC_TO_NSEC(10));
>  		if (error)
>  			return (EINVAL);
> +		goto again;
>  	}
>  
> -	mmap = SIMPLEQ_FIRST(&sc->sc_mmap_q);
> -	if (mmap == NULL)
> -		panic("uvideo_dqbuf: NULL pointer!");
> +	mmap = &sc->sc_mmap[i];
>  
>  	bcopy(&mmap->v4l2_buf, dqb, sizeof(struct v4l2_buffer));
>  
> @@ -3631,7 +3720,6 @@ uvideo_dqbuf(void *v, struct v4l2_buffer
>  
>  	DPRINTF(2, "%s: %s: frame dequeued from index %d\n",
>  	    DEVNAME(sc), __func__, mmap->v4l2_buf.index);
> -	SIMPLEQ_REMOVE_HEAD(&sc->sc_mmap_q, q_frames);
>  
>  	return (0);
>  }
> Index: sys/dev/usb/uvideo.h
> ===================================================================
> RCS file: /home/cvs/src/sys/dev/usb/uvideo.h,v
> diff -u -p -r1.64 uvideo.h
> --- sys/dev/usb/uvideo.h	26 Feb 2025 20:50:46 -0000	1.64
> +++ sys/dev/usb/uvideo.h	14 Mar 2025 12:16:09 -0000
> @@ -553,11 +553,9 @@ struct uvideo_frame_buffer {
>   */
>  #define UVIDEO_MAX_BUFFERS	8
>  struct uvideo_mmap {
> -	SIMPLEQ_ENTRY(uvideo_mmap)	q_frames;
>  	uint8_t				*buf;
>  	struct v4l2_buffer		 v4l2_buf;
>  };
> -typedef SIMPLEQ_HEAD(, uvideo_mmap) q_mmap;
>  
>  struct uvideo_format_group {
>  	uint32_t				 pixelformat;

#include <sys/queue.h> should be removed as well.