Index | Thread | Search

From:
Visa Hankala <visa@hankala.org>
Subject:
Re: kqueue event max
To:
Ted Unangst <tedu@tedunangst.com>
Cc:
tech@openbsd.org
Date:
Mon, 12 May 2025 14:14:11 +0000

Download raw body.

Thread
On Mon, May 12, 2025 at 04:52:36AM -0400, Ted Unangst wrote:
> Switch max timer events to using the same limit as user events. These are 
> roughly in the class, things we don't want userland to run away with, so 
> I think it's reasonable to simply combine and share the limit.
> 
> Ideally, these limits should be using uidinfo, but that requires some 
> more work to account for setuid changes. Consolidating the checks
> is a step on the way.

I think it is reasonable to use the same limit, especially if it
becomes necessary to make the limit configurable.

kern_event.c changes OK visa@

> Index: kern_event.c
> ===================================================================
> RCS file: /home/cvs/src/sys/kern/kern_event.c,v
> diff -u -p -r1.203 kern_event.c
> --- kern_event.c	11 May 2025 20:03:08 -0000	1.203
> +++ kern_event.c	12 May 2025 08:42:02 -0000
> @@ -206,8 +206,6 @@ struct	pool knote_pool;
>  struct	pool kqueue_pool;
>  struct	mutex kqueue_klist_lock = MUTEX_INITIALIZER(IPL_MPFLOOR);
>  struct	rwlock kqueue_ps_list_lock = RWLOCK_INITIALIZER("kqpsl");
> -int kq_ntimeouts = 0;			/* [a] */
> -int kq_timeoutmax = (4 * 1024);		/* [I] */
>  unsigned int kq_usereventsmax = 1024;	/* per process */
>  
>  #define KN_HASH(val, mask)	(((val) ^ (val >> 8)) & (mask))
> @@ -698,16 +696,19 @@ filt_timerexpire(void *knx)
>  int
>  filt_timerattach(struct knote *kn)
>  {
> +	struct filedesc *fdp = kn->kn_kq->kq_fdp;
>  	struct timespec ts;
>  	struct filt_timer *ft;
> +	u_int nuserevents;
>  	int error;
>  
>  	error = filt_timervalidate(kn->kn_sfflags, kn->kn_sdata, &ts);
>  	if (error != 0)
>  		return (error);
>  
> -	if (atomic_inc_int_nv(&kq_ntimeouts) > kq_timeoutmax) {
> -		atomic_dec_int(&kq_ntimeouts);
> +	nuserevents = atomic_inc_int_nv(&fdp->fd_nuserevents);
> +	if (nuserevents > atomic_load_int(&kq_usereventsmax)) {
> +		atomic_dec_int(&fdp->fd_nuserevents);
>  		return (ENOMEM);
>  	}
>  
> @@ -729,6 +730,7 @@ filt_timerattach(struct knote *kn)
>  void
>  filt_timerdetach(struct knote *kn)
>  {
> +	struct filedesc *fdp = kn->kn_kq->kq_fdp;
>  	struct filt_timer *ft = kn->kn_hook;
>  
>  	mtx_enter(&ft->ft_mtx);
> @@ -737,7 +739,7 @@ filt_timerdetach(struct knote *kn)
>  
>  	timeout_del_barrier(&ft->ft_to);
>  	free(ft, M_KEVENT, sizeof(*ft));
> -	atomic_dec_int(&kq_ntimeouts);
> +	atomic_dec_int(&fdp->fd_nuserevents);
>  }
>  
>  int
> Index: kern_sysctl.c
> ===================================================================
> RCS file: /home/cvs/src/sys/kern/kern_sysctl.c,v
> diff -u -p -r1.468 kern_sysctl.c
> --- kern_sysctl.c	9 May 2025 14:53:22 -0000	1.468
> +++ kern_sysctl.c	12 May 2025 08:34:03 -0000
> @@ -2606,6 +2606,7 @@ sysctl_diskinit(int update, struct proc 
>  			sdk->ds_attachtime = dk->dk_attachtime;
>  			sdk->ds_timestamp = dk->dk_timestamp;
>  			sdk->ds_time = dk->dk_time;
> +			sdk->ds_qtime = dk->dk_qtime;
>  			mtx_leave(&dk->dk_mtx);
>  			sdk++;
>  		}
> @@ -2628,7 +2629,7 @@ sysctl_diskinit(int update, struct proc 
>  			sdk->ds_wbytes = dk->dk_wbytes;
>  			sdk->ds_attachtime = dk->dk_attachtime;
>  			sdk->ds_timestamp = dk->dk_timestamp;
> -			sdk->ds_time = dk->dk_time;
> +			sdk->ds_qtime = dk->dk_qtime;
>  			mtx_leave(&dk->dk_mtx);
>  			sdk++;
>  		}
> Index: subr_disk.c
> ===================================================================
> RCS file: /home/cvs/src/sys/kern/subr_disk.c,v
> diff -u -p -r1.273 subr_disk.c
> --- subr_disk.c	30 Oct 2024 06:16:27 -0000	1.273
> +++ subr_disk.c	2 May 2025 04:30:48 -0000
> @@ -1256,15 +1256,27 @@ disk_busy(struct disk *diskp)
>  void
>  disk_unbusy(struct disk *diskp, long bcount, daddr_t blkno, int read)
>  {
> +	disk_unbusy2(diskp, bcount, blkno, read, NULL);
> +}
> +void
> +disk_unbusy2(struct disk *diskp, long bcount, daddr_t blkno, int read,
> +	struct buf *bp)
> +{
>  	struct timeval dv_time, diff_time;
> +	struct timespec fintime, qtime;
>  
>  	mtx_enter(&diskp->dk_mtx);
>  
>  	if (diskp->dk_busy-- == 0)
>  		printf("disk_unbusy: %s: dk_busy < 0\n", diskp->dk_name);
>  
> -	microuptime(&dv_time);
> +	if (bp != NULL) {
> +		nanoruntime(&fintime);
> +		timespecsub(&fintime, &bp->b_qtime, &qtime);
> +		timespecadd(&diskp->dk_qtime, &qtime, &diskp->dk_qtime);
> +	}
>  
> +	microuptime(&dv_time);
>  	timersub(&dv_time, &diskp->dk_timestamp, &diff_time);
>  	timeradd(&diskp->dk_time, &diff_time, &diskp->dk_time);
>  
>