From: Kirill A. Korinsky Subject: Re: sys/uvideo: avoid using queue.h To: Marcus Glocker Cc: tech@openbsd.org Date: Sun, 16 Mar 2025 11:10:22 +0100 On Sun, 16 Mar 2025 08:24:29 +0100, Marcus Glocker wrote: > > On Fri, Mar 14, 2025 at 02:16:23PM GMT, Kirill A. Korinsky wrote: > > > On Fri, 14 Mar 2025 14:10:01 +0100, > > Kirill A. Korinsky wrote: > > > > > > On Thu, 06 Mar 2025 09:36:46 +0100, > > > Kirill A. Korinsky wrote: > > > > > > > > tech@, > > > > > > > > macroses from queue.h doesn't thread safe and wraping it by mutex on > > > > uvideo use case seems isn't so good idea. > > > > > > > > It has a hardcoded number of used buffers as 8 and already has a code > > > > which iterates against them to find the buffer which is ready for > > > > queueing in uvideo_mmap_getbuf. > > > > > > > > Here I'd like to replace SIMPLEQ_* by similar logic. > > > > > > > > It fixes two bugs: > > > > > > > > 1. If isochronous webcam sends two frames in one transfer, > > > > SIMPLEQ_INSERT_TAIL adds the same element twice and creates infinity > > > > loop inside sc_mmap_q which will be iterating in uvideo_vs_free_frame. > > > > > > > > 2. Prevents from calling SIMPLEQ_INSERT_TAIL and SIMPLEQ_REMOVE_HEAD for > > > > the same element in the queue which may lead to unexpected state. > > > > > > > > This diff was suceffuly tested on: > > > > - Elgato Facecam Pro (isochronous) > > > > - Jabra PanaCast (bulk) > > > > > > > > > > Here an alternative approach where I had implemented Multi Producer Multi > > > Consumer, wait-free and fixed size FIFO queue which is thread safe. > > > > > > I had replaced both for-loop to it, and it works. > > > > > > It can be done faster by replacing % (reminder) by mask but it requires to > > > change UVIDEO_MAX_BUFFERS to something in form 2^x - 1. > > > > > > Use 7 as UVIDEO_MAX_BUFFERS is safe in term of memory usage, but at least > > > ffmpeg had 8 as hardcoded constant which changes its behaviour, if I recall > > > rigth. > > > > > > Put 15 should be safe as well, but 4K frames allocates ~16 Mb memory and it > > > turns to ~240Mb for mmap memory. It should be fine for amd64 and arm64, but > > > I have no idea about other architectures. > > > > > > So, here reminder. > > > > > > Regarding the queue algorithm itself. > > > > > > I don't know who invented it on the first place, but this is a version which > > > I had invented independently in 2016 or 2017 for some large Java app, and > > > which is uses it under load for years without any knwon to me issues. > > > > > > So, it's fine to be licensed under ISC and as far as I know it isn't > > > patented, but I haven't done any patent search. > > > > > > But it is quite trivial, and I assume that it is well known algorithm > > > for someone. > > > > > > Any test and feedback is welcome. > > > > > > > Ooops, I had sent a wrong diff. Here the right one. > > I'm not familiar with this specific queuing algorithm, but it seems to > work :-) > Yeah, and it is quite obvious, just a two counters. > I gave it a spin, and I can see that FIFO is working, and that it always > iterates through all the buffers, also with lower frame rates. This is > in line with the behavior of the Linux driver, and is also an improvement > compared to the current SIMPLEQ implementation, which is only consuming > buffer zero, mostly. > > I will need to do some more testing. > Thanks! I, locally, test it as well with only https://marc.info/?l=openbsd-tech&m=174180759929893&w=2 Everything quite stable on my usecases. > Some initial in-line comments. > > > +#define UVIDEO_MMAP_QUEUE_SIZE (UVIDEO_MAX_BUFFERS + 1) > > + > > +struct uvideo_mmap_queue_item { > > + unsigned long qi_seq; > > + int qi_idx; > > +}; > > + > > +struct uvideo_mmap_queue { > > + struct uvideo_mmap_queue_item q_items[UVIDEO_MMAP_QUEUE_SIZE]; > > Line break, maybe remove the additional space indentations for these new > structures. > Or I may follow style of uvideo_softc and call it as q_item. At least fold(1) thinks it's ok. > > #include should be removed as well. Do you think it is worth to move FIFO queue into sys/queue.h? Anyway, here an updated diff: Index: sys/dev/usb/uvideo.c =================================================================== RCS file: /home/cvs/src/sys/dev/usb/uvideo.c,v diff -u -p -r1.252 uvideo.c --- sys/dev/usb/uvideo.c 12 Mar 2025 14:08:51 -0000 1.252 +++ sys/dev/usb/uvideo.c 16 Mar 2025 10:05:04 -0000 @@ -24,6 +24,7 @@ #include #include #include +#include #include @@ -50,6 +51,20 @@ int uvideo_debug = 1; #define byteof(x) ((x) >> 3) #define bitof(x) (1L << ((x) & 0x7)) +/* queue needs at least one empty element, to avoid infinity loop */ +#define UVIDEO_MMAP_QUEUE_SIZE (UVIDEO_MAX_BUFFERS + 1) + +struct uvideo_mmap_queue_item { + unsigned long qi_seq; + int qi_idx; +}; + +struct uvideo_mmap_queue { + struct uvideo_mmap_queue_item q_item[UVIDEO_MMAP_QUEUE_SIZE]; + unsigned long q_head_seq; + unsigned long q_tail_seq; +}; + struct uvideo_softc { struct device sc_dev; struct usbd_device *sc_udev; @@ -68,10 +83,12 @@ struct uvideo_softc { struct uvideo_mmap sc_mmap[UVIDEO_MAX_BUFFERS]; uint8_t *sc_mmap_buffer; size_t sc_mmap_buffer_size; - q_mmap sc_mmap_q; int sc_mmap_count; int sc_mmap_flag; + struct uvideo_mmap_queue sc_mmap_q_in; + struct uvideo_mmap_queue sc_mmap_q_out; + struct vnode *sc_vp; struct usb_task sc_task_write; @@ -173,6 +190,8 @@ void uvideo_vs_decode_stream_header(str void uvideo_vs_decode_stream_header_isight(struct uvideo_softc *, uint8_t *, int); void uvideo_mmap_queue(struct uvideo_softc *, uint8_t *, int, int); +int uvideo_mmap_queue_dequeue(struct uvideo_mmap_queue *); +int uvideo_mmap_queue_enqueue(struct uvideo_mmap_queue *, int); void uvideo_read(struct uvideo_softc *, uint8_t *, int); usbd_status uvideo_usb_control(struct uvideo_softc *, uint8_t, uint8_t, uint16_t, uint8_t *, size_t); @@ -669,8 +688,6 @@ uvideo_attach_hook(struct device *self) if (error != USBD_NORMAL_COMPLETION) return; - /* init mmap queue */ - SIMPLEQ_INIT(&sc->sc_mmap_q); sc->sc_mmap_count = 0; DPRINTF(1, "uvideo_attach: doing video_attach_mi\n"); @@ -1960,9 +1977,6 @@ uvideo_vs_free_frame(struct uvideo_softc sc->sc_mmap_buffer_size = 0; } - while (!SIMPLEQ_EMPTY(&sc->sc_mmap_q)) - SIMPLEQ_REMOVE_HEAD(&sc->sc_mmap_q, q_frames); - sc->sc_mmap_count = 0; } @@ -2498,6 +2512,57 @@ uvideo_vs_decode_stream_header_isight(st } } +static inline int +uvideo_mmap_queue_op(struct uvideo_mmap_queue_item *items, + unsigned long *seq, int dequeue, int idx) { + long diff; + unsigned long s; + struct uvideo_mmap_queue_item *item; + + for (;;) { + s = atomic_load_long(seq); + item = &items[(int)(s % UVIDEO_MMAP_QUEUE_SIZE)]; + diff = item->qi_seq - (s + dequeue); + + if (diff == 0) { + /* item can be locked */ + if (atomic_cas_ulong(seq, s, s + 1) != s) + continue; + + /* it is locked */ + + if (dequeue) { + idx = item->qi_idx; + item->qi_seq = s + UVIDEO_MMAP_QUEUE_SIZE; + return (idx); + } else { + item->qi_idx = idx; + item->qi_seq++; + return (0); + } + } + + if (diff < 0) { + /* queue is full */ + return (-1); + } + + /* diff > 0 - means another thread moved sequence */ + } +} + +int +uvideo_mmap_queue_dequeue(struct uvideo_mmap_queue *q) +{ + return uvideo_mmap_queue_op(q->q_item, &q->q_tail_seq, 1, -1); +} + +int +uvideo_mmap_queue_enqueue(struct uvideo_mmap_queue *q, int idx) +{ + return uvideo_mmap_queue_op(q->q_item, &q->q_head_seq, 0, idx); +} + void uvideo_mmap_queue(struct uvideo_softc *sc, uint8_t *buf, int len, int err) { @@ -2507,11 +2572,9 @@ uvideo_mmap_queue(struct uvideo_softc *s panic("%s: mmap buffers not allocated", __func__); /* find a buffer which is ready for queueing */ - for (i = 0; i < sc->sc_mmap_count; i++) { - if (sc->sc_mmap[i].v4l2_buf.flags & V4L2_BUF_FLAG_QUEUED) - break; - } - if (i == sc->sc_mmap_count) { + i = uvideo_mmap_queue_dequeue(&sc->sc_mmap_q_in); + + if (i < 0) { DPRINTF(1, "%s: %s: mmap queue is full!\n", DEVNAME(sc), __func__); return; @@ -2537,7 +2600,10 @@ uvideo_mmap_queue(struct uvideo_softc *s /* queue it */ sc->sc_mmap[i].v4l2_buf.flags |= V4L2_BUF_FLAG_DONE; sc->sc_mmap[i].v4l2_buf.flags &= ~V4L2_BUF_FLAG_QUEUED; - SIMPLEQ_INSERT_TAIL(&sc->sc_mmap_q, &sc->sc_mmap[i], q_frames); + + if (uvideo_mmap_queue_enqueue(&sc->sc_mmap_q_out, i)) + panic("%s: mmap queue overflow", __func__); + DPRINTF(2, "%s: %s: frame queued on index %d\n", DEVNAME(sc), __func__, i); @@ -3553,6 +3619,20 @@ uvideo_reqbufs(void *v, struct v4l2_requ sc->sc_mmap[i].v4l2_buf.length); } + /* reset queues */ + for (i = 0; i < UVIDEO_MMAP_QUEUE_SIZE; i++) { + sc->sc_mmap_q_in.q_item[i].qi_seq = i; + sc->sc_mmap_q_in.q_item[i].qi_idx = -1; + + sc->sc_mmap_q_out.q_item[i].qi_seq = i; + sc->sc_mmap_q_out.q_item[i].qi_idx = -1; + } + + sc->sc_mmap_q_in.q_head_seq = 0; + sc->sc_mmap_q_in.q_tail_seq = 0; + sc->sc_mmap_q_out.q_head_seq = 0; + sc->sc_mmap_q_out.q_tail_seq = 0; + /* tell how many buffers we have really allocated */ rb->count = sc->sc_mmap_count; @@ -3586,6 +3666,7 @@ uvideo_querybuf(void *v, struct v4l2_buf int uvideo_qbuf(void *v, struct v4l2_buffer *qb) { + int flags; struct uvideo_softc *sc = v; if (qb->type != V4L2_BUF_TYPE_VIDEO_CAPTURE || @@ -3593,9 +3674,15 @@ uvideo_qbuf(void *v, struct v4l2_buffer qb->index >= sc->sc_mmap_count) return (EINVAL); + flags = sc->sc_mmap[qb->index].v4l2_buf.flags; sc->sc_mmap[qb->index].v4l2_buf.flags &= ~V4L2_BUF_FLAG_DONE; sc->sc_mmap[qb->index].v4l2_buf.flags |= V4L2_BUF_FLAG_QUEUED; + if (uvideo_mmap_queue_enqueue(&sc->sc_mmap_q_in, qb->index)) { + sc->sc_mmap[qb->index].v4l2_buf.flags = flags; + return (EINVAL); + } + DPRINTF(2, "%s: %s: buffer on index %d ready for queueing\n", DEVNAME(sc), __func__, qb->index); @@ -3605,24 +3692,26 @@ uvideo_qbuf(void *v, struct v4l2_buffer int uvideo_dqbuf(void *v, struct v4l2_buffer *dqb) { - struct uvideo_softc *sc = v; struct uvideo_mmap *mmap; - int error; + struct uvideo_softc *sc = v; + int error, i; if (dqb->type != V4L2_BUF_TYPE_VIDEO_CAPTURE || dqb->memory != V4L2_MEMORY_MMAP) return (EINVAL); - if (SIMPLEQ_EMPTY(&sc->sc_mmap_q)) { +again: + i = uvideo_mmap_queue_dequeue(&sc->sc_mmap_q_out); + + if (i < 0) { /* mmap queue is empty, block until first frame is queued */ error = tsleep_nsec(sc, 0, "vid_mmap", SEC_TO_NSEC(10)); if (error) return (EINVAL); + goto again; } - mmap = SIMPLEQ_FIRST(&sc->sc_mmap_q); - if (mmap == NULL) - panic("uvideo_dqbuf: NULL pointer!"); + mmap = &sc->sc_mmap[i]; bcopy(&mmap->v4l2_buf, dqb, sizeof(struct v4l2_buffer)); @@ -3631,7 +3720,6 @@ uvideo_dqbuf(void *v, struct v4l2_buffer DPRINTF(2, "%s: %s: frame dequeued from index %d\n", DEVNAME(sc), __func__, mmap->v4l2_buf.index); - SIMPLEQ_REMOVE_HEAD(&sc->sc_mmap_q, q_frames); return (0); } Index: sys/dev/usb/uvideo.h =================================================================== RCS file: /home/cvs/src/sys/dev/usb/uvideo.h,v diff -u -p -r1.64 uvideo.h --- sys/dev/usb/uvideo.h 26 Feb 2025 20:50:46 -0000 1.64 +++ sys/dev/usb/uvideo.h 16 Mar 2025 10:00:52 -0000 @@ -17,7 +17,6 @@ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */ -#include #include /* @@ -553,11 +552,9 @@ struct uvideo_frame_buffer { */ #define UVIDEO_MAX_BUFFERS 8 struct uvideo_mmap { - SIMPLEQ_ENTRY(uvideo_mmap) q_frames; uint8_t *buf; struct v4l2_buffer v4l2_buf; }; -typedef SIMPLEQ_HEAD(, uvideo_mmap) q_mmap; struct uvideo_format_group { uint32_t pixelformat;