Download raw body.
sys/uvideo: avoid using queue.h
On Sun, 16 Mar 2025 08:24:29 +0100,
Marcus Glocker <marcus@nazgul.ch> wrote:
>
> On Fri, Mar 14, 2025 at 02:16:23PM GMT, Kirill A. Korinsky wrote:
>
> > On Fri, 14 Mar 2025 14:10:01 +0100,
> > Kirill A. Korinsky <kirill@korins.ky> wrote:
> > >
> > > On Thu, 06 Mar 2025 09:36:46 +0100,
> > > Kirill A. Korinsky <kirill@korins.ky> wrote:
> > > >
> > > > tech@,
> > > >
> > > > macroses from queue.h doesn't thread safe and wraping it by mutex on
> > > > uvideo use case seems isn't so good idea.
> > > >
> > > > It has a hardcoded number of used buffers as 8 and already has a code
> > > > which iterates against them to find the buffer which is ready for
> > > > queueing in uvideo_mmap_getbuf.
> > > >
> > > > Here I'd like to replace SIMPLEQ_* by similar logic.
> > > >
> > > > It fixes two bugs:
> > > >
> > > > 1. If isochronous webcam sends two frames in one transfer,
> > > > SIMPLEQ_INSERT_TAIL adds the same element twice and creates infinity
> > > > loop inside sc_mmap_q which will be iterating in uvideo_vs_free_frame.
> > > >
> > > > 2. Prevents from calling SIMPLEQ_INSERT_TAIL and SIMPLEQ_REMOVE_HEAD for
> > > > the same element in the queue which may lead to unexpected state.
> > > >
> > > > This diff was suceffuly tested on:
> > > > - Elgato Facecam Pro (isochronous)
> > > > - Jabra PanaCast (bulk)
> > > >
> > >
> > > Here an alternative approach where I had implemented Multi Producer Multi
> > > Consumer, wait-free and fixed size FIFO queue which is thread safe.
> > >
> > > I had replaced both for-loop to it, and it works.
> > >
> > > It can be done faster by replacing % (reminder) by mask but it requires to
> > > change UVIDEO_MAX_BUFFERS to something in form 2^x - 1.
> > >
> > > Use 7 as UVIDEO_MAX_BUFFERS is safe in term of memory usage, but at least
> > > ffmpeg had 8 as hardcoded constant which changes its behaviour, if I recall
> > > rigth.
> > >
> > > Put 15 should be safe as well, but 4K frames allocates ~16 Mb memory and it
> > > turns to ~240Mb for mmap memory. It should be fine for amd64 and arm64, but
> > > I have no idea about other architectures.
> > >
> > > So, here reminder.
> > >
> > > Regarding the queue algorithm itself.
> > >
> > > I don't know who invented it on the first place, but this is a version which
> > > I had invented independently in 2016 or 2017 for some large Java app, and
> > > which is uses it under load for years without any knwon to me issues.
> > >
> > > So, it's fine to be licensed under ISC and as far as I know it isn't
> > > patented, but I haven't done any patent search.
> > >
> > > But it is quite trivial, and I assume that it is well known algorithm
> > > for someone.
> > >
> > > Any test and feedback is welcome.
> > >
> >
> > Ooops, I had sent a wrong diff. Here the right one.
>
> I'm not familiar with this specific queuing algorithm, but it seems to
> work :-)
>
Yeah, and it is quite obvious, just a two counters.
> I gave it a spin, and I can see that FIFO is working, and that it always
> iterates through all the buffers, also with lower frame rates. This is
> in line with the behavior of the Linux driver, and is also an improvement
> compared to the current SIMPLEQ implementation, which is only consuming
> buffer zero, mostly.
>
> I will need to do some more testing.
>
Thanks! I, locally, test it as well with only
https://marc.info/?l=openbsd-tech&m=174180759929893&w=2
Everything quite stable on my usecases.
> Some initial in-line comments.
>
> > +#define UVIDEO_MMAP_QUEUE_SIZE (UVIDEO_MAX_BUFFERS + 1)
> > +
> > +struct uvideo_mmap_queue_item {
> > + unsigned long qi_seq;
> > + int qi_idx;
> > +};
> > +
> > +struct uvideo_mmap_queue {
> > + struct uvideo_mmap_queue_item q_items[UVIDEO_MMAP_QUEUE_SIZE];
>
> Line break, maybe remove the additional space indentations for these new
> structures.
>
Or I may follow style of uvideo_softc and call it as q_item.
At least fold(1) thinks it's ok.
>
> #include <sys/queue.h> should be removed as well.
Do you think it is worth to move FIFO queue into sys/queue.h?
Anyway, here an updated diff:
Index: sys/dev/usb/uvideo.c
===================================================================
RCS file: /home/cvs/src/sys/dev/usb/uvideo.c,v
diff -u -p -r1.252 uvideo.c
--- sys/dev/usb/uvideo.c 12 Mar 2025 14:08:51 -0000 1.252
+++ sys/dev/usb/uvideo.c 16 Mar 2025 10:05:04 -0000
@@ -24,6 +24,7 @@
#include <sys/stat.h>
#include <sys/kthread.h>
#include <sys/stdint.h>
+#include <sys/atomic.h>
#include <uvm/uvm_extern.h>
@@ -50,6 +51,20 @@ int uvideo_debug = 1;
#define byteof(x) ((x) >> 3)
#define bitof(x) (1L << ((x) & 0x7))
+/* queue needs at least one empty element, to avoid infinity loop */
+#define UVIDEO_MMAP_QUEUE_SIZE (UVIDEO_MAX_BUFFERS + 1)
+
+struct uvideo_mmap_queue_item {
+ unsigned long qi_seq;
+ int qi_idx;
+};
+
+struct uvideo_mmap_queue {
+ struct uvideo_mmap_queue_item q_item[UVIDEO_MMAP_QUEUE_SIZE];
+ unsigned long q_head_seq;
+ unsigned long q_tail_seq;
+};
+
struct uvideo_softc {
struct device sc_dev;
struct usbd_device *sc_udev;
@@ -68,10 +83,12 @@ struct uvideo_softc {
struct uvideo_mmap sc_mmap[UVIDEO_MAX_BUFFERS];
uint8_t *sc_mmap_buffer;
size_t sc_mmap_buffer_size;
- q_mmap sc_mmap_q;
int sc_mmap_count;
int sc_mmap_flag;
+ struct uvideo_mmap_queue sc_mmap_q_in;
+ struct uvideo_mmap_queue sc_mmap_q_out;
+
struct vnode *sc_vp;
struct usb_task sc_task_write;
@@ -173,6 +190,8 @@ void uvideo_vs_decode_stream_header(str
void uvideo_vs_decode_stream_header_isight(struct uvideo_softc *,
uint8_t *, int);
void uvideo_mmap_queue(struct uvideo_softc *, uint8_t *, int, int);
+int uvideo_mmap_queue_dequeue(struct uvideo_mmap_queue *);
+int uvideo_mmap_queue_enqueue(struct uvideo_mmap_queue *, int);
void uvideo_read(struct uvideo_softc *, uint8_t *, int);
usbd_status uvideo_usb_control(struct uvideo_softc *, uint8_t, uint8_t,
uint16_t, uint8_t *, size_t);
@@ -669,8 +688,6 @@ uvideo_attach_hook(struct device *self)
if (error != USBD_NORMAL_COMPLETION)
return;
- /* init mmap queue */
- SIMPLEQ_INIT(&sc->sc_mmap_q);
sc->sc_mmap_count = 0;
DPRINTF(1, "uvideo_attach: doing video_attach_mi\n");
@@ -1960,9 +1977,6 @@ uvideo_vs_free_frame(struct uvideo_softc
sc->sc_mmap_buffer_size = 0;
}
- while (!SIMPLEQ_EMPTY(&sc->sc_mmap_q))
- SIMPLEQ_REMOVE_HEAD(&sc->sc_mmap_q, q_frames);
-
sc->sc_mmap_count = 0;
}
@@ -2498,6 +2512,57 @@ uvideo_vs_decode_stream_header_isight(st
}
}
+static inline int
+uvideo_mmap_queue_op(struct uvideo_mmap_queue_item *items,
+ unsigned long *seq, int dequeue, int idx) {
+ long diff;
+ unsigned long s;
+ struct uvideo_mmap_queue_item *item;
+
+ for (;;) {
+ s = atomic_load_long(seq);
+ item = &items[(int)(s % UVIDEO_MMAP_QUEUE_SIZE)];
+ diff = item->qi_seq - (s + dequeue);
+
+ if (diff == 0) {
+ /* item can be locked */
+ if (atomic_cas_ulong(seq, s, s + 1) != s)
+ continue;
+
+ /* it is locked */
+
+ if (dequeue) {
+ idx = item->qi_idx;
+ item->qi_seq = s + UVIDEO_MMAP_QUEUE_SIZE;
+ return (idx);
+ } else {
+ item->qi_idx = idx;
+ item->qi_seq++;
+ return (0);
+ }
+ }
+
+ if (diff < 0) {
+ /* queue is full */
+ return (-1);
+ }
+
+ /* diff > 0 - means another thread moved sequence */
+ }
+}
+
+int
+uvideo_mmap_queue_dequeue(struct uvideo_mmap_queue *q)
+{
+ return uvideo_mmap_queue_op(q->q_item, &q->q_tail_seq, 1, -1);
+}
+
+int
+uvideo_mmap_queue_enqueue(struct uvideo_mmap_queue *q, int idx)
+{
+ return uvideo_mmap_queue_op(q->q_item, &q->q_head_seq, 0, idx);
+}
+
void
uvideo_mmap_queue(struct uvideo_softc *sc, uint8_t *buf, int len, int err)
{
@@ -2507,11 +2572,9 @@ uvideo_mmap_queue(struct uvideo_softc *s
panic("%s: mmap buffers not allocated", __func__);
/* find a buffer which is ready for queueing */
- for (i = 0; i < sc->sc_mmap_count; i++) {
- if (sc->sc_mmap[i].v4l2_buf.flags & V4L2_BUF_FLAG_QUEUED)
- break;
- }
- if (i == sc->sc_mmap_count) {
+ i = uvideo_mmap_queue_dequeue(&sc->sc_mmap_q_in);
+
+ if (i < 0) {
DPRINTF(1, "%s: %s: mmap queue is full!\n",
DEVNAME(sc), __func__);
return;
@@ -2537,7 +2600,10 @@ uvideo_mmap_queue(struct uvideo_softc *s
/* queue it */
sc->sc_mmap[i].v4l2_buf.flags |= V4L2_BUF_FLAG_DONE;
sc->sc_mmap[i].v4l2_buf.flags &= ~V4L2_BUF_FLAG_QUEUED;
- SIMPLEQ_INSERT_TAIL(&sc->sc_mmap_q, &sc->sc_mmap[i], q_frames);
+
+ if (uvideo_mmap_queue_enqueue(&sc->sc_mmap_q_out, i))
+ panic("%s: mmap queue overflow", __func__);
+
DPRINTF(2, "%s: %s: frame queued on index %d\n",
DEVNAME(sc), __func__, i);
@@ -3553,6 +3619,20 @@ uvideo_reqbufs(void *v, struct v4l2_requ
sc->sc_mmap[i].v4l2_buf.length);
}
+ /* reset queues */
+ for (i = 0; i < UVIDEO_MMAP_QUEUE_SIZE; i++) {
+ sc->sc_mmap_q_in.q_item[i].qi_seq = i;
+ sc->sc_mmap_q_in.q_item[i].qi_idx = -1;
+
+ sc->sc_mmap_q_out.q_item[i].qi_seq = i;
+ sc->sc_mmap_q_out.q_item[i].qi_idx = -1;
+ }
+
+ sc->sc_mmap_q_in.q_head_seq = 0;
+ sc->sc_mmap_q_in.q_tail_seq = 0;
+ sc->sc_mmap_q_out.q_head_seq = 0;
+ sc->sc_mmap_q_out.q_tail_seq = 0;
+
/* tell how many buffers we have really allocated */
rb->count = sc->sc_mmap_count;
@@ -3586,6 +3666,7 @@ uvideo_querybuf(void *v, struct v4l2_buf
int
uvideo_qbuf(void *v, struct v4l2_buffer *qb)
{
+ int flags;
struct uvideo_softc *sc = v;
if (qb->type != V4L2_BUF_TYPE_VIDEO_CAPTURE ||
@@ -3593,9 +3674,15 @@ uvideo_qbuf(void *v, struct v4l2_buffer
qb->index >= sc->sc_mmap_count)
return (EINVAL);
+ flags = sc->sc_mmap[qb->index].v4l2_buf.flags;
sc->sc_mmap[qb->index].v4l2_buf.flags &= ~V4L2_BUF_FLAG_DONE;
sc->sc_mmap[qb->index].v4l2_buf.flags |= V4L2_BUF_FLAG_QUEUED;
+ if (uvideo_mmap_queue_enqueue(&sc->sc_mmap_q_in, qb->index)) {
+ sc->sc_mmap[qb->index].v4l2_buf.flags = flags;
+ return (EINVAL);
+ }
+
DPRINTF(2, "%s: %s: buffer on index %d ready for queueing\n",
DEVNAME(sc), __func__, qb->index);
@@ -3605,24 +3692,26 @@ uvideo_qbuf(void *v, struct v4l2_buffer
int
uvideo_dqbuf(void *v, struct v4l2_buffer *dqb)
{
- struct uvideo_softc *sc = v;
struct uvideo_mmap *mmap;
- int error;
+ struct uvideo_softc *sc = v;
+ int error, i;
if (dqb->type != V4L2_BUF_TYPE_VIDEO_CAPTURE ||
dqb->memory != V4L2_MEMORY_MMAP)
return (EINVAL);
- if (SIMPLEQ_EMPTY(&sc->sc_mmap_q)) {
+again:
+ i = uvideo_mmap_queue_dequeue(&sc->sc_mmap_q_out);
+
+ if (i < 0) {
/* mmap queue is empty, block until first frame is queued */
error = tsleep_nsec(sc, 0, "vid_mmap", SEC_TO_NSEC(10));
if (error)
return (EINVAL);
+ goto again;
}
- mmap = SIMPLEQ_FIRST(&sc->sc_mmap_q);
- if (mmap == NULL)
- panic("uvideo_dqbuf: NULL pointer!");
+ mmap = &sc->sc_mmap[i];
bcopy(&mmap->v4l2_buf, dqb, sizeof(struct v4l2_buffer));
@@ -3631,7 +3720,6 @@ uvideo_dqbuf(void *v, struct v4l2_buffer
DPRINTF(2, "%s: %s: frame dequeued from index %d\n",
DEVNAME(sc), __func__, mmap->v4l2_buf.index);
- SIMPLEQ_REMOVE_HEAD(&sc->sc_mmap_q, q_frames);
return (0);
}
Index: sys/dev/usb/uvideo.h
===================================================================
RCS file: /home/cvs/src/sys/dev/usb/uvideo.h,v
diff -u -p -r1.64 uvideo.h
--- sys/dev/usb/uvideo.h 26 Feb 2025 20:50:46 -0000 1.64
+++ sys/dev/usb/uvideo.h 16 Mar 2025 10:00:52 -0000
@@ -17,7 +17,6 @@
* OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
*/
-#include <sys/queue.h>
#include <sys/videoio.h>
/*
@@ -553,11 +552,9 @@ struct uvideo_frame_buffer {
*/
#define UVIDEO_MAX_BUFFERS 8
struct uvideo_mmap {
- SIMPLEQ_ENTRY(uvideo_mmap) q_frames;
uint8_t *buf;
struct v4l2_buffer v4l2_buf;
};
-typedef SIMPLEQ_HEAD(, uvideo_mmap) q_mmap;
struct uvideo_format_group {
uint32_t pixelformat;
sys/uvideo: avoid using queue.h