From: Kirill A. Korinsky Subject: Re: sys/uvideo: avoid using queue.h To: tech@openbsd.org, mglocker@openbsd.org Date: Fri, 14 Mar 2025 14:10:01 +0100 On Thu, 06 Mar 2025 09:36:46 +0100, Kirill A. Korinsky wrote: > > tech@, > > macroses from queue.h doesn't thread safe and wraping it by mutex on > uvideo use case seems isn't so good idea. > > It has a hardcoded number of used buffers as 8 and already has a code > which iterates against them to find the buffer which is ready for > queueing in uvideo_mmap_getbuf. > > Here I'd like to replace SIMPLEQ_* by similar logic. > > It fixes two bugs: > > 1. If isochronous webcam sends two frames in one transfer, > SIMPLEQ_INSERT_TAIL adds the same element twice and creates infinity > loop inside sc_mmap_q which will be iterating in uvideo_vs_free_frame. > > 2. Prevents from calling SIMPLEQ_INSERT_TAIL and SIMPLEQ_REMOVE_HEAD for > the same element in the queue which may lead to unexpected state. > > This diff was suceffuly tested on: > - Elgato Facecam Pro (isochronous) > - Jabra PanaCast (bulk) > Here an alternative approach where I had implemented Multi Producer Multi Consumer, wait-free and fixed size FIFO queue which is thread safe. I had replaced both for-loop to it, and it works. It can be done faster by replacing % (reminder) by mask but it requires to change UVIDEO_MAX_BUFFERS to something in form 2^x - 1. Use 7 as UVIDEO_MAX_BUFFERS is safe in term of memory usage, but at least ffmpeg had 8 as hardcoded constant which changes its behaviour, if I recall rigth. Put 15 should be safe as well, but 4K frames allocates ~16 Mb memory and it turns to ~240Mb for mmap memory. It should be fine for amd64 and arm64, but I have no idea about other architectures. So, here reminder. Regarding the queue algorithm itself. I don't know who invented it on the first place, but this is a version which I had invented independently in 2016 or 2017 for some large Java app, and which is uses it under load for years without any knwon to me issues. So, it's fine to be licensed under ISC and as far as I know it isn't patented, but I haven't done any patent search. But it is quite trivial, and I assume that it is well known algorithm for someone. Any test and feedback is welcome. Index: sys/dev/usb/uvideo.c =================================================================== RCS file: /home/cvs/src/sys/dev/usb/uvideo.c,v diff -u -p -r1.252 uvideo.c --- sys/dev/usb/uvideo.c 12 Mar 2025 14:08:51 -0000 1.252 +++ sys/dev/usb/uvideo.c 14 Mar 2025 13:04:39 -0000 @@ -24,6 +24,7 @@ #include #include #include +#include #include @@ -50,6 +51,20 @@ int uvideo_debug = 1; #define byteof(x) ((x) >> 3) #define bitof(x) (1L << ((x) & 0x7)) +/* queue needs at least one empty element, to avoid inifity loop */ +#define UVIDEO_MMAP_QUEUE_SIZE (UVIDEO_MAX_BUFFERS + 1) + +struct uvideo_mmap_queue_item { + unsigned long qi_seq; + int qi_idx; +}; + +struct uvideo_mmap_queue { + struct uvideo_mmap_queue_item q_items[UVIDEO_MMAP_QUEUE_SIZE]; + unsigned long q_head_seq; + unsigned long q_tail_seq; +}; + struct uvideo_softc { struct device sc_dev; struct usbd_device *sc_udev; @@ -68,10 +83,12 @@ struct uvideo_softc { struct uvideo_mmap sc_mmap[UVIDEO_MAX_BUFFERS]; uint8_t *sc_mmap_buffer; size_t sc_mmap_buffer_size; - q_mmap sc_mmap_q; int sc_mmap_count; int sc_mmap_flag; + struct uvideo_mmap_queue sc_mmap_q_in; + struct uvideo_mmap_queue sc_mmap_q_out; + struct vnode *sc_vp; struct usb_task sc_task_write; @@ -173,6 +190,8 @@ void uvideo_vs_decode_stream_header(str void uvideo_vs_decode_stream_header_isight(struct uvideo_softc *, uint8_t *, int); void uvideo_mmap_queue(struct uvideo_softc *, uint8_t *, int, int); +int uvideo_mmap_queue_dequeue(struct uvideo_mmap_queue *); +int uvideo_mmap_queue_enqueue(struct uvideo_mmap_queue *, int); void uvideo_read(struct uvideo_softc *, uint8_t *, int); usbd_status uvideo_usb_control(struct uvideo_softc *, uint8_t, uint8_t, uint16_t, uint8_t *, size_t); @@ -669,8 +688,6 @@ uvideo_attach_hook(struct device *self) if (error != USBD_NORMAL_COMPLETION) return; - /* init mmap queue */ - SIMPLEQ_INIT(&sc->sc_mmap_q); sc->sc_mmap_count = 0; DPRINTF(1, "uvideo_attach: doing video_attach_mi\n"); @@ -1960,9 +1977,6 @@ uvideo_vs_free_frame(struct uvideo_softc sc->sc_mmap_buffer_size = 0; } - while (!SIMPLEQ_EMPTY(&sc->sc_mmap_q)) - SIMPLEQ_REMOVE_HEAD(&sc->sc_mmap_q, q_frames); - sc->sc_mmap_count = 0; } @@ -2498,6 +2512,57 @@ uvideo_vs_decode_stream_header_isight(st } } +static inline int +uvideo_mmap_queue_op(struct uvideo_mmap_queue_item *items, + unsigned long *seq, int dequeue, int idx) { + long diff; + unsigned long s; + struct uvideo_mmap_queue_item *item; + + for (;;) { + s = atomic_load_long(seq); + item = &items[(int)(s % UVIDEO_MMAP_QUEUE_SIZE)]; + diff = item->qi_seq - (s + dequeue); + + if (diff == 0) { + /* item can be locked */ + if (atomic_cas_ulong(seq, s, s + 1) != s) + continue; + + /* it is locked */ + + if (dequeue) { + idx = item->qi_idx; + item->qi_seq = s + UVIDEO_MMAP_QUEUE_SIZE; + return idx; + } else { + item->qi_idx = idx; + item->qi_seq++; + return 0; + } + } + + if (diff < 0) { + /* queue is full */ + return -1; + } + + /* diff > 0 - means another thread moved sequence */ + } +} + +int +uvideo_mmap_queue_dequeue(struct uvideo_mmap_queue *q) +{ + return uvideo_mmap_queue_op(q->q_items, &q->q_tail_seq, 1, -1); +} + +int +uvideo_mmap_queue_enqueue(struct uvideo_mmap_queue *q, int idx) +{ + return uvideo_mmap_queue_op(q->q_items, &q->q_head_seq, 0, idx); +} + void uvideo_mmap_queue(struct uvideo_softc *sc, uint8_t *buf, int len, int err) { @@ -2507,11 +2572,9 @@ uvideo_mmap_queue(struct uvideo_softc *s panic("%s: mmap buffers not allocated", __func__); /* find a buffer which is ready for queueing */ - for (i = 0; i < sc->sc_mmap_count; i++) { - if (sc->sc_mmap[i].v4l2_buf.flags & V4L2_BUF_FLAG_QUEUED) - break; - } - if (i == sc->sc_mmap_count) { + i = uvideo_mmap_queue_dequeue(&sc->sc_mmap_q_in); + + if (i < 0) { DPRINTF(1, "%s: %s: mmap queue is full!\n", DEVNAME(sc), __func__); return; @@ -2537,7 +2600,10 @@ uvideo_mmap_queue(struct uvideo_softc *s /* queue it */ sc->sc_mmap[i].v4l2_buf.flags |= V4L2_BUF_FLAG_DONE; sc->sc_mmap[i].v4l2_buf.flags &= ~V4L2_BUF_FLAG_QUEUED; - SIMPLEQ_INSERT_TAIL(&sc->sc_mmap_q, &sc->sc_mmap[i], q_frames); + + if (uvideo_mmap_queue_enqueue(&sc->sc_mmap_q_out, i)) + panic("%s: mmap queue overflow", __func__); + DPRINTF(2, "%s: %s: frame queued on index %d\n", DEVNAME(sc), __func__, i); @@ -3553,6 +3619,20 @@ uvideo_reqbufs(void *v, struct v4l2_requ sc->sc_mmap[i].v4l2_buf.length); } + /* reset queues */ + for (i = 0; i < UVIDEO_MMAP_QUEUE_SIZE; i++) { + sc->sc_mmap_q_in.q_items[i].qi_seq = i; + sc->sc_mmap_q_in.q_items[i].qi_idx = -1; + + sc->sc_mmap_q_out.q_items[i].qi_seq = i; + sc->sc_mmap_q_out.q_items[i].qi_idx = -1; + } + + sc->sc_mmap_q_in.q_head_seq = 0; + sc->sc_mmap_q_in.q_tail_seq = 0; + sc->sc_mmap_q_out.q_head_seq = 0; + sc->sc_mmap_q_out.q_tail_seq = 0; + /* tell how many buffers we have really allocated */ rb->count = sc->sc_mmap_count; @@ -3586,6 +3666,7 @@ uvideo_querybuf(void *v, struct v4l2_buf int uvideo_qbuf(void *v, struct v4l2_buffer *qb) { + int flags; struct uvideo_softc *sc = v; if (qb->type != V4L2_BUF_TYPE_VIDEO_CAPTURE || @@ -3593,9 +3674,15 @@ uvideo_qbuf(void *v, struct v4l2_buffer qb->index >= sc->sc_mmap_count) return (EINVAL); + flags = sc->sc_mmap[qb->index].v4l2_buf.flags; sc->sc_mmap[qb->index].v4l2_buf.flags &= ~V4L2_BUF_FLAG_DONE; sc->sc_mmap[qb->index].v4l2_buf.flags |= V4L2_BUF_FLAG_QUEUED; + if (uvideo_mmap_queue_enqueue(&sc->sc_mmap_q_in, qb->index)) { + sc->sc_mmap[qb->index].v4l2_buf.flags = flags; + return (EINVAL); + } + DPRINTF(2, "%s: %s: buffer on index %d ready for queueing\n", DEVNAME(sc), __func__, qb->index); @@ -3605,24 +3692,26 @@ uvideo_qbuf(void *v, struct v4l2_buffer int uvideo_dqbuf(void *v, struct v4l2_buffer *dqb) { - struct uvideo_softc *sc = v; struct uvideo_mmap *mmap; - int error; + struct uvideo_softc *sc = v; + int error, i; if (dqb->type != V4L2_BUF_TYPE_VIDEO_CAPTURE || dqb->memory != V4L2_MEMORY_MMAP) return (EINVAL); - if (SIMPLEQ_EMPTY(&sc->sc_mmap_q)) { +again: + i = uvideo_mmap_queue_dequeue(&sc->sc_mmap_q_out); + + if (i < 0) { /* mmap queue is empty, block until first frame is queued */ error = tsleep_nsec(sc, 0, "vid_mmap", SEC_TO_NSEC(10)); if (error) return (EINVAL); + goto again; } - mmap = SIMPLEQ_FIRST(&sc->sc_mmap_q); - if (mmap == NULL) - panic("uvideo_dqbuf: NULL pointer!"); + mmap = (&sc->sc_mmap[i]; bcopy(&mmap->v4l2_buf, dqb, sizeof(struct v4l2_buffer)); @@ -3631,7 +3720,6 @@ uvideo_dqbuf(void *v, struct v4l2_buffer DPRINTF(2, "%s: %s: frame dequeued from index %d\n", DEVNAME(sc), __func__, mmap->v4l2_buf.index); - SIMPLEQ_REMOVE_HEAD(&sc->sc_mmap_q, q_frames); return (0); } Index: sys/dev/usb/uvideo.h =================================================================== RCS file: /home/cvs/src/sys/dev/usb/uvideo.h,v diff -u -p -r1.64 uvideo.h --- sys/dev/usb/uvideo.h 26 Feb 2025 20:50:46 -0000 1.64 +++ sys/dev/usb/uvideo.h 14 Mar 2025 12:16:09 -0000 @@ -553,11 +553,9 @@ struct uvideo_frame_buffer { */ #define UVIDEO_MAX_BUFFERS 8 struct uvideo_mmap { - SIMPLEQ_ENTRY(uvideo_mmap) q_frames; uint8_t *buf; struct v4l2_buffer v4l2_buf; }; -typedef SIMPLEQ_HEAD(, uvideo_mmap) q_mmap; struct uvideo_format_group { uint32_t pixelformat;