From: "Ted Unangst" <tedu@tedunangst.com>
Subject: kqueue event max
To: tech@openbsd.org
Date: Mon, 12 May 2025 04:52:36 -0400

Switch max timer events to using the same limit as user events. These are 
roughly in the class, things we don't want userland to run away with, so 
I think it's reasonable to simply combine and share the limit.

Ideally, these limits should be using uidinfo, but that requires some 
more work to account for setuid changes. Consolidating the checks
is a step on the way.

Index: kern_event.c
===================================================================
RCS file: /home/cvs/src/sys/kern/kern_event.c,v
diff -u -p -r1.203 kern_event.c
--- kern_event.c	11 May 2025 20:03:08 -0000	1.203
+++ kern_event.c	12 May 2025 08:42:02 -0000
@@ -206,8 +206,6 @@ struct	pool knote_pool;
 struct	pool kqueue_pool;
 struct	mutex kqueue_klist_lock = MUTEX_INITIALIZER(IPL_MPFLOOR);
 struct	rwlock kqueue_ps_list_lock = RWLOCK_INITIALIZER("kqpsl");
-int kq_ntimeouts = 0;			/* [a] */
-int kq_timeoutmax = (4 * 1024);		/* [I] */
 unsigned int kq_usereventsmax = 1024;	/* per process */
 
 #define KN_HASH(val, mask)	(((val) ^ (val >> 8)) & (mask))
@@ -698,16 +696,19 @@ filt_timerexpire(void *knx)
 int
 filt_timerattach(struct knote *kn)
 {
+	struct filedesc *fdp = kn->kn_kq->kq_fdp;
 	struct timespec ts;
 	struct filt_timer *ft;
+	u_int nuserevents;
 	int error;
 
 	error = filt_timervalidate(kn->kn_sfflags, kn->kn_sdata, &ts);
 	if (error != 0)
 		return (error);
 
-	if (atomic_inc_int_nv(&kq_ntimeouts) > kq_timeoutmax) {
-		atomic_dec_int(&kq_ntimeouts);
+	nuserevents = atomic_inc_int_nv(&fdp->fd_nuserevents);
+	if (nuserevents > atomic_load_int(&kq_usereventsmax)) {
+		atomic_dec_int(&fdp->fd_nuserevents);
 		return (ENOMEM);
 	}
 
@@ -729,6 +730,7 @@ filt_timerattach(struct knote *kn)
 void
 filt_timerdetach(struct knote *kn)
 {
+	struct filedesc *fdp = kn->kn_kq->kq_fdp;
 	struct filt_timer *ft = kn->kn_hook;
 
 	mtx_enter(&ft->ft_mtx);
@@ -737,7 +739,7 @@ filt_timerdetach(struct knote *kn)
 
 	timeout_del_barrier(&ft->ft_to);
 	free(ft, M_KEVENT, sizeof(*ft));
-	atomic_dec_int(&kq_ntimeouts);
+	atomic_dec_int(&fdp->fd_nuserevents);
 }
 
 int
Index: kern_sysctl.c
===================================================================
RCS file: /home/cvs/src/sys/kern/kern_sysctl.c,v
diff -u -p -r1.468 kern_sysctl.c
--- kern_sysctl.c	9 May 2025 14:53:22 -0000	1.468
+++ kern_sysctl.c	12 May 2025 08:34:03 -0000
@@ -2606,6 +2606,7 @@ sysctl_diskinit(int update, struct proc 
 			sdk->ds_attachtime = dk->dk_attachtime;
 			sdk->ds_timestamp = dk->dk_timestamp;
 			sdk->ds_time = dk->dk_time;
+			sdk->ds_qtime = dk->dk_qtime;
 			mtx_leave(&dk->dk_mtx);
 			sdk++;
 		}
@@ -2628,7 +2629,7 @@ sysctl_diskinit(int update, struct proc 
 			sdk->ds_wbytes = dk->dk_wbytes;
 			sdk->ds_attachtime = dk->dk_attachtime;
 			sdk->ds_timestamp = dk->dk_timestamp;
-			sdk->ds_time = dk->dk_time;
+			sdk->ds_qtime = dk->dk_qtime;
 			mtx_leave(&dk->dk_mtx);
 			sdk++;
 		}
Index: subr_disk.c
===================================================================
RCS file: /home/cvs/src/sys/kern/subr_disk.c,v
diff -u -p -r1.273 subr_disk.c
--- subr_disk.c	30 Oct 2024 06:16:27 -0000	1.273
+++ subr_disk.c	2 May 2025 04:30:48 -0000
@@ -1256,15 +1256,27 @@ disk_busy(struct disk *diskp)
 void
 disk_unbusy(struct disk *diskp, long bcount, daddr_t blkno, int read)
 {
+	disk_unbusy2(diskp, bcount, blkno, read, NULL);
+}
+void
+disk_unbusy2(struct disk *diskp, long bcount, daddr_t blkno, int read,
+	struct buf *bp)
+{
 	struct timeval dv_time, diff_time;
+	struct timespec fintime, qtime;
 
 	mtx_enter(&diskp->dk_mtx);
 
 	if (diskp->dk_busy-- == 0)
 		printf("disk_unbusy: %s: dk_busy < 0\n", diskp->dk_name);
 
-	microuptime(&dv_time);
+	if (bp != NULL) {
+		nanoruntime(&fintime);
+		timespecsub(&fintime, &bp->b_qtime, &qtime);
+		timespecadd(&diskp->dk_qtime, &qtime, &diskp->dk_qtime);
+	}
 
+	microuptime(&dv_time);
 	timersub(&dv_time, &diskp->dk_timestamp, &diff_time);
 	timeradd(&diskp->dk_time, &diff_time, &diskp->dk_time);