Index | Thread | Search

From:
Martin Pieuchot <mpi@grenadille.net>
Subject:
wired pages vs sysctl(2)
To:
tech@openbsd.org
Cc:
kettenis@openbsd.org
Date:
Thu, 27 Feb 2025 14:44:13 +0100

Download raw body.

Thread
Since the 90s sysctl(2) has been wiring pages that contain the syscall
arguments. 

In uvm/uvm_fault.c r1.129 kettenis@ fixed one of the races exposed by
syzkaller where a sibling thread munmap(2)ed the range currently wired
by sysctl(2).

Sadly the fix is incomplete and the reproducer attached triggers the
following panic:

panic: uvm_fault_unwire_locked: address not in map
Stopped at      db_enter+0x14:  popq    %rbp
    TID    PID    UID     PRFLAGS     PFLAGS  CPU  COMMAND
*129832   2744      0         0x3          0    0  sysctl_wired
  84116  96377      0        0x10        0x8    2  sshd-session
   6897  50184      0     0x14000      0x200    1  softnet0
db_enter() at db_enter+0x14
panic(ffffffff8261d529) at panic+0xdd
uvm_fault_unwire_locked(fffffd878156ab58,20000000,20001000) at uvm_fault_unwire
_locked+0x246
uvm_fault_unwire(fffffd878156ab58,20000000,20001000) at uvm_fault_unwire+0x44
sysctl_vsunlock(20000040,0) at sysctl_vsunlock+0x46
net_sysctl(ffff800055d411b4,3,20000040,ffff800055d411e8,20000080,c,cf641d03c835
bbeb) at net_sysctl+0x309
sys_sysctl(ffff800055c99c78,ffff800055d41300,ffff800055d41270) at sys_sysctl+0x
1f8
syscall(ffff800055d41300) at syscall+0x5ec
Xsyscall() at Xsyscall+0x128
 
Syzkaller has two reports for this:
  https://syzkaller.appspot.com/bug?extid=d9b926edfd5f64a66c58
  https://syzkaller.appspot.com/bug?extid=a12540517e3a76a6a490

It is not clear to my why vslock(9) is needed today.  That said the diff
below completes kettenis@'s approach by making vslock similar to mlock(2).
With it, vsunlock() now silently fails without crashing the kernel and I
can no longer reproduce the panic.  

ok?

diff --git sys/uvm/uvm_fault.c sys/uvm/uvm_fault.c
index cf6ea625571..e0f2ee8e08e 100644
--- sys/uvm/uvm_fault.c
+++ sys/uvm/uvm_fault.c
@@ -1782,16 +1782,12 @@ uvm_fault_unwire_locked(vm_map_t map, vaddr_t start, vaddr_t end)
 		 * find the map entry for the current address.
 		 */
 		KASSERT(va >= entry->start);
-		while (entry && va >= entry->end) {
+		while (va >= entry->end) {
 			next = RBT_NEXT(uvm_map_addr, entry);
+			KASSERT(next != NULL && next->start <= entry->end);
 			entry = next;
 		}
 
-		if (entry == NULL)
-			return;
-		if (va < entry->start)
-			continue;
-
 		/*
 		 * lock it.
 		 */
@@ -1819,7 +1815,7 @@ uvm_fault_unwire_locked(vm_map_t map, vaddr_t start, vaddr_t end)
 	}
 
 	if (oentry != NULL) {
-		uvm_map_unlock_entry(oentry);
+		uvm_map_unlock_entry(entry);
 	}
 }
 
diff --git sys/uvm/uvm_glue.c sys/uvm/uvm_glue.c
index 0d7bd9ea10b..63c6f8393d6 100644
--- sys/uvm/uvm_glue.c
+++ sys/uvm/uvm_glue.c
@@ -114,7 +114,7 @@ uvm_vslock(struct proc *p, caddr_t addr, size_t len, vm_prot_t access_type)
 	if (end <= start)
 		return (EINVAL);
 
-	return uvm_fault_wire(map, start, end, access_type);
+	return uvm_map_pageable(map, start, end, FALSE, 0);
 }
 
 /*
@@ -125,13 +125,14 @@ uvm_vslock(struct proc *p, caddr_t addr, size_t len, vm_prot_t access_type)
 void
 uvm_vsunlock(struct proc *p, caddr_t addr, size_t len)
 {
+	struct vm_map *map = &p->p_vmspace->vm_map;
 	vaddr_t start, end;
 
 	start = trunc_page((vaddr_t)addr);
 	end = round_page((vaddr_t)addr + len);
 	KASSERT(end > start);
 
-	uvm_fault_unwire(&p->p_vmspace->vm_map, start, end);
+	uvm_map_pageable(map, start, end, TRUE, 0);
 }
 
 /*
/*	$OpenBSD$ */

/*
 * MT race reproducer: unmap memory range while sysctl(2) marked it wired.
 */

#include <netinet/in.h>
#include <netinet/icmp6.h>
#include <sys/mman.h>
#include <sys/types.h>
#include <sys/socket.h>
#include <sys/sysctl.h>

#include <err.h>
#include <pthread.h>
#include <stdint.h>
#include <stdio.h>
#include <string.h>
#include <unistd.h>

static pthread_mutex_t gmtx = PTHREAD_MUTEX_INITIALIZER;

size_t addr = 0x20000000UL;
size_t len = 0x01000000UL;

void *
thr_unmap(void *arg)
{
	pthread_mutex_lock(&gmtx);

	for (;;)
		munmap((void *)addr, 0x4000UL);

	return NULL;
}

int
main(void)
{
	pthread_t th;
	size_t *olenp;
	void *oldp, *newp;
	char *pg;
	int i, error, pgsz, *mib;

	pgsz = getpagesize();

	mmap((void *)addr, len, PROT_READ|PROT_WRITE,
	    MAP_ANON|MAP_FIXED|MAP_PRIVATE, -1, 0);

	/* fault in all pages. */
	for (i = 0; i < len; i+= pgsz) {
		pg = (char *)(addr + i);
		pg[0] = 1;
	}

	mib = (int *)(addr + 0xc0);
	mib[0] = CTL_NET;
	mib[1] = AF_INET6;
	mib[2] = IPPROTO_ICMPV6;
	mib[3] = ICMPV6CTL_REDIRTIMEOUT;

	oldp = (void *)(addr + 0x40);

	olenp = (size_t *)(addr + 0x100);
	*olenp = 0;

	/* new arguments */
	newp = (char *)(addr + 0x80);
	memcpy(newp, "somegarbage", 12);

	pthread_mutex_lock(&gmtx);
	/* competing thread. */
	error = pthread_create(&th, NULL, thr_unmap, NULL);
	if (error)
		warn("pthread_create");

	/* Ensure memory is still mapped. */
	printf("%s\n", (char *)newp);
	pthread_mutex_unlock(&gmtx);
	for (i = 0; i < (100 * 100); i++) {
		error = sysctl(mib, 4, oldp, olenp, newp, 12);
	}

	return 0;
}