Index | Thread | Search

From:
Marcus Glocker <marcus@nazgul.ch>
Subject:
Re: sys/uvideo: avoid one bcopy when reads into mmaped memory
To:
"Kirill A. Korinsky" <kirill@korins.ky>
Cc:
tech@openbsd.org
Date:
Sat, 1 Mar 2025 09:38:03 +0100

Download raw body.

Thread
On Wed, Feb 26, 2025 at 11:51:34PM GMT, Kirill A. Korinsky wrote:

> On Wed, 26 Feb 2025 23:10:01 +0100,
> Kirill A. Korinsky <kirill@korins.ky> wrote:
> > 
> > tech@,
> > 
> > Here a diff which removes one bcopy for each frame which is read from
> > webcamp into mmaped memory.
> > 
> > Feedback? Ok?
> >
> 
> Here the diff which is based on the latest version of uvideo.

I like the idea.

Though, I would prefer if we could do the selection of the buffer in
the isoc case before we enter the isoc frame loop, instead of calling
the buffer selection function every time in the stream header function.

Also, I would leave the V4L2_BUF_FLAG_QUEUED flagging for when we
insert the new frame in to our queue.

Here is a slightly adapted version of your diff, which considers the
above points, as an alternative.

I can't test the bulk case here currently ...


Index: sys/dev/usb/uvideo.c
===================================================================
RCS file: /cvs/src/sys/dev/usb/uvideo.c,v
diff -u -p -u -p -r1.246 uvideo.c
--- sys/dev/usb/uvideo.c	26 Feb 2025 21:03:52 -0000	1.246
+++ sys/dev/usb/uvideo.c	1 Mar 2025 08:26:11 -0000
@@ -66,6 +66,7 @@ struct uvideo_softc {
 	struct uvideo_frame_buffer		 sc_frame_buffer;
 
 	struct uvideo_mmap			 sc_mmap[UVIDEO_MAX_BUFFERS];
+	struct uvideo_mmap			*sc_mmap_cur;
 	uint8_t					*sc_mmap_buffer;
 	size_t					 sc_mmap_buffer_size;
 	q_mmap					 sc_mmap_q;
@@ -103,7 +104,7 @@ struct uvideo_softc {
 	const struct uvideo_devs		*sc_quirk;
 	void					(*sc_decode_stream_header)
 						    (struct uvideo_softc *,
-						    uint8_t *, int);
+						    uint8_t *, int, uint8_t *);
 };
 
 int		uvideo_open(void *, int, int *, uint8_t *, void (*)(void *),
@@ -168,10 +169,11 @@ void		uvideo_vs_start_isoc_ixfer(struct 
 void		uvideo_vs_cb(struct usbd_xfer *, void *,
 		    usbd_status);
 void		uvideo_vs_decode_stream_header(struct uvideo_softc *,
-		    uint8_t *, int); 
+		    uint8_t *, int, uint8_t *); 
 void		uvideo_vs_decode_stream_header_isight(struct uvideo_softc *,
-		    uint8_t *, int);
-void		uvideo_mmap_queue(struct uvideo_softc *, uint8_t *, int, int);
+		    uint8_t *, int, uint8_t *);
+uint8_t		*uvideo_mmap_getbuf(struct uvideo_softc *);
+void		uvideo_mmap_queue(struct uvideo_softc *, int, int);
 void		uvideo_read(struct uvideo_softc *, uint8_t *, int);
 usbd_status	uvideo_usb_control(struct uvideo_softc *, uint8_t, uint8_t,
 		    uint16_t, uint8_t *, size_t);
@@ -2196,6 +2198,7 @@ uvideo_vs_start_bulk_thread(void *arg)
 	struct uvideo_softc *sc = arg;
 	usbd_status error;
 	int size;
+	uint8_t *buf;
 
 	usbd_ref_incr(sc->sc_udev);
 	while (sc->sc_vs_cur->bulk_running) {
@@ -2222,7 +2225,14 @@ uvideo_vs_start_bulk_thread(void *arg)
 
 		DPRINTF(2, "%s: *** buffer len = %d\n", DEVNAME(sc), size);
 
-		sc->sc_decode_stream_header(sc, sc->sc_vs_cur->bxfer.buf, size);
+		if (sc->sc_mmap_flag) {
+			if ((buf = uvideo_mmap_getbuf(sc)) == NULL)
+				continue;
+		} else
+			buf = sc->sc_frame_buffer.buf;
+
+		sc->sc_decode_stream_header(sc, sc->sc_vs_cur->bxfer.buf, size,
+		    buf);
 	}
 	usbd_ref_decr(sc->sc_udev);
 
@@ -2276,7 +2286,7 @@ uvideo_vs_cb(struct usbd_xfer *xfer, voi
 	struct uvideo_isoc_xfer *ixfer = priv;
 	struct uvideo_softc *sc = ixfer->sc;
 	int len, i, frame_size;
-	uint8_t *frame;
+	uint8_t *frame, *buf;
 
 	DPRINTF(2, "%s: %s\n", DEVNAME(sc), __func__);
 
@@ -2291,6 +2301,12 @@ uvideo_vs_cb(struct usbd_xfer *xfer, voi
 	if (len == 0)
 		goto skip;
 
+	if (sc->sc_mmap_flag) {
+		if ((buf = uvideo_mmap_getbuf(sc)) == NULL)
+			goto skip;
+	} else
+		buf = sc->sc_frame_buffer.buf;
+
 	for (i = 0; i < sc->sc_nframes; i++) {
 		frame = ixfer->buf + (i * sc->sc_vs_cur->psize);
 		frame_size = ixfer->size[i];
@@ -2299,7 +2315,7 @@ uvideo_vs_cb(struct usbd_xfer *xfer, voi
 			/* frame is empty */
 			continue;
 
-		sc->sc_decode_stream_header(sc, frame, frame_size);
+		sc->sc_decode_stream_header(sc, frame, frame_size, buf);
 	}
 
 skip:	/* setup new transfer */
@@ -2308,7 +2324,7 @@ skip:	/* setup new transfer */
 
 void
 uvideo_vs_decode_stream_header(struct uvideo_softc *sc, uint8_t *frame,
-    int frame_size)
+    int frame_size, uint8_t *buf)
 {
 	struct uvideo_frame_buffer *fb = &sc->sc_frame_buffer;
 	struct usb_video_stream_header *sh;
@@ -2371,7 +2387,7 @@ uvideo_vs_decode_stream_header(struct uv
 		fb->error = 1;
 	}
 	if (sample_len > 0) {
-		bcopy(frame + sh->bLength, fb->buf + fb->offset, sample_len);
+		bcopy(frame + sh->bLength, buf + fb->offset, sample_len);
 		fb->offset += sample_len;
 	}
 
@@ -2394,7 +2410,7 @@ uvideo_vs_decode_stream_header(struct uv
 #endif
 		if (sc->sc_mmap_flag) {
 			/* mmap */
-			uvideo_mmap_queue(sc, fb->buf, fb->offset, fb->error);
+			uvideo_mmap_queue(sc, fb->offset, fb->error);
 		} else if (fb->error) {
 			DPRINTF(1, "%s: %s: error frame, skipped!\n",
 			    DEVNAME(sc), __func__);
@@ -2427,7 +2443,7 @@ uvideo_vs_decode_stream_header(struct uv
  */
 void
 uvideo_vs_decode_stream_header_isight(struct uvideo_softc *sc, uint8_t *frame,
-    int frame_size)
+    int frame_size, uint8_t *buf)
 {
 	struct uvideo_frame_buffer *fb = &sc->sc_frame_buffer;
 	int sample_len, header = 0;
@@ -2448,7 +2464,7 @@ uvideo_vs_decode_stream_header_isight(st
 	if (header) {
 		if (sc->sc_mmap_flag) {
 			/* mmap */
-			uvideo_mmap_queue(sc, fb->buf, fb->offset, 0);
+			uvideo_mmap_queue(sc, fb->offset, 0);
 		} else {
 			/* read */
 			uvideo_read(sc, fb->buf, fb->offset);
@@ -2458,14 +2474,14 @@ uvideo_vs_decode_stream_header_isight(st
 		/* save sample */
 		sample_len = frame_size;
 		if ((fb->offset + sample_len) <= fb->buf_size) {
-			bcopy(frame, fb->buf + fb->offset, sample_len);
+			bcopy(frame, buf + fb->offset, sample_len);
 			fb->offset += sample_len;
 		}
 	}
 }
 
-void
-uvideo_mmap_queue(struct uvideo_softc *sc, uint8_t *buf, int len, int err)
+uint8_t *
+uvideo_mmap_getbuf(struct uvideo_softc *sc)
 {
 	int i;
 
@@ -2480,32 +2496,39 @@ uvideo_mmap_queue(struct uvideo_softc *s
 	if (i == sc->sc_mmap_count) {
 		DPRINTF(1, "%s: %s: mmap queue is full!\n",
 		    DEVNAME(sc), __func__);
-		return;
+		return NULL;
 	}
 
+	sc->sc_mmap_cur = &sc->sc_mmap[i];
+
+	return sc->sc_mmap_cur->buf;
+}
+
+void
+uvideo_mmap_queue(struct uvideo_softc *sc, int len, int err)
+{
 	/* copy frame to mmap buffer and report length */
-	bcopy(buf, sc->sc_mmap[i].buf, len);
-	sc->sc_mmap[i].v4l2_buf.bytesused = len;
+	sc->sc_mmap_cur->v4l2_buf.bytesused = len;
 
 	/* timestamp it */
-	getmicrouptime(&sc->sc_mmap[i].v4l2_buf.timestamp);
-	sc->sc_mmap[i].v4l2_buf.flags &= ~V4L2_BUF_FLAG_TIMESTAMP_MASK;
-	sc->sc_mmap[i].v4l2_buf.flags |= V4L2_BUF_FLAG_TIMESTAMP_MONOTONIC;
-	sc->sc_mmap[i].v4l2_buf.flags &= ~V4L2_BUF_FLAG_TSTAMP_SRC_MASK;
-	sc->sc_mmap[i].v4l2_buf.flags |= V4L2_BUF_FLAG_TSTAMP_SRC_EOF;
-	sc->sc_mmap[i].v4l2_buf.flags &= ~V4L2_BUF_FLAG_TIMECODE;
+	getmicrouptime(&sc->sc_mmap_cur->v4l2_buf.timestamp);
+	sc->sc_mmap_cur->v4l2_buf.flags &= ~V4L2_BUF_FLAG_TIMESTAMP_MASK;
+	sc->sc_mmap_cur->v4l2_buf.flags |= V4L2_BUF_FLAG_TIMESTAMP_MONOTONIC;
+	sc->sc_mmap_cur->v4l2_buf.flags &= ~V4L2_BUF_FLAG_TSTAMP_SRC_MASK;
+	sc->sc_mmap_cur->v4l2_buf.flags |= V4L2_BUF_FLAG_TSTAMP_SRC_EOF;
+	sc->sc_mmap_cur->v4l2_buf.flags &= ~V4L2_BUF_FLAG_TIMECODE;
 
 	/* forward error bit */
-	sc->sc_mmap[i].v4l2_buf.flags &= ~V4L2_BUF_FLAG_ERROR;
+	sc->sc_mmap_cur->v4l2_buf.flags &= ~V4L2_BUF_FLAG_ERROR;
 	if (err)
-		sc->sc_mmap[i].v4l2_buf.flags |= V4L2_BUF_FLAG_ERROR;
+		sc->sc_mmap_cur->v4l2_buf.flags |= V4L2_BUF_FLAG_ERROR;
 
 	/* queue it */
-	sc->sc_mmap[i].v4l2_buf.flags |= V4L2_BUF_FLAG_DONE;
-	sc->sc_mmap[i].v4l2_buf.flags &= ~V4L2_BUF_FLAG_QUEUED;
-	SIMPLEQ_INSERT_TAIL(&sc->sc_mmap_q, &sc->sc_mmap[i], q_frames);
-	DPRINTF(2, "%s: %s: frame queued on index %d\n",
-	    DEVNAME(sc), __func__, i);
+	sc->sc_mmap_cur->v4l2_buf.flags |= V4L2_BUF_FLAG_DONE;
+	sc->sc_mmap_cur->v4l2_buf.flags &= ~V4L2_BUF_FLAG_QUEUED;
+	SIMPLEQ_INSERT_TAIL(&sc->sc_mmap_q, sc->sc_mmap_cur, q_frames);
+	sc->sc_mmap_cur = NULL;
+	DPRINTF(2, "%s: %s: frame queued\n", DEVNAME(sc), __func__);
 
 	wakeup(sc);