Download raw body.
wired pages vs sysctl(2)
Since the 90s sysctl(2) has been wiring pages that contain the syscall
arguments.
In uvm/uvm_fault.c r1.129 kettenis@ fixed one of the races exposed by
syzkaller where a sibling thread munmap(2)ed the range currently wired
by sysctl(2).
Sadly the fix is incomplete and the reproducer attached triggers the
following panic:
panic: uvm_fault_unwire_locked: address not in map
Stopped at db_enter+0x14: popq %rbp
TID PID UID PRFLAGS PFLAGS CPU COMMAND
*129832 2744 0 0x3 0 0 sysctl_wired
84116 96377 0 0x10 0x8 2 sshd-session
6897 50184 0 0x14000 0x200 1 softnet0
db_enter() at db_enter+0x14
panic(ffffffff8261d529) at panic+0xdd
uvm_fault_unwire_locked(fffffd878156ab58,20000000,20001000) at uvm_fault_unwire
_locked+0x246
uvm_fault_unwire(fffffd878156ab58,20000000,20001000) at uvm_fault_unwire+0x44
sysctl_vsunlock(20000040,0) at sysctl_vsunlock+0x46
net_sysctl(ffff800055d411b4,3,20000040,ffff800055d411e8,20000080,c,cf641d03c835
bbeb) at net_sysctl+0x309
sys_sysctl(ffff800055c99c78,ffff800055d41300,ffff800055d41270) at sys_sysctl+0x
1f8
syscall(ffff800055d41300) at syscall+0x5ec
Xsyscall() at Xsyscall+0x128
Syzkaller has two reports for this:
https://syzkaller.appspot.com/bug?extid=d9b926edfd5f64a66c58
https://syzkaller.appspot.com/bug?extid=a12540517e3a76a6a490
It is not clear to my why vslock(9) is needed today. That said the diff
below completes kettenis@'s approach by making vslock similar to mlock(2).
With it, vsunlock() now silently fails without crashing the kernel and I
can no longer reproduce the panic.
ok?
diff --git sys/uvm/uvm_fault.c sys/uvm/uvm_fault.c
index cf6ea625571..e0f2ee8e08e 100644
--- sys/uvm/uvm_fault.c
+++ sys/uvm/uvm_fault.c
@@ -1782,16 +1782,12 @@ uvm_fault_unwire_locked(vm_map_t map, vaddr_t start, vaddr_t end)
* find the map entry for the current address.
*/
KASSERT(va >= entry->start);
- while (entry && va >= entry->end) {
+ while (va >= entry->end) {
next = RBT_NEXT(uvm_map_addr, entry);
+ KASSERT(next != NULL && next->start <= entry->end);
entry = next;
}
- if (entry == NULL)
- return;
- if (va < entry->start)
- continue;
-
/*
* lock it.
*/
@@ -1819,7 +1815,7 @@ uvm_fault_unwire_locked(vm_map_t map, vaddr_t start, vaddr_t end)
}
if (oentry != NULL) {
- uvm_map_unlock_entry(oentry);
+ uvm_map_unlock_entry(entry);
}
}
diff --git sys/uvm/uvm_glue.c sys/uvm/uvm_glue.c
index 0d7bd9ea10b..63c6f8393d6 100644
--- sys/uvm/uvm_glue.c
+++ sys/uvm/uvm_glue.c
@@ -114,7 +114,7 @@ uvm_vslock(struct proc *p, caddr_t addr, size_t len, vm_prot_t access_type)
if (end <= start)
return (EINVAL);
- return uvm_fault_wire(map, start, end, access_type);
+ return uvm_map_pageable(map, start, end, FALSE, 0);
}
/*
@@ -125,13 +125,14 @@ uvm_vslock(struct proc *p, caddr_t addr, size_t len, vm_prot_t access_type)
void
uvm_vsunlock(struct proc *p, caddr_t addr, size_t len)
{
+ struct vm_map *map = &p->p_vmspace->vm_map;
vaddr_t start, end;
start = trunc_page((vaddr_t)addr);
end = round_page((vaddr_t)addr + len);
KASSERT(end > start);
- uvm_fault_unwire(&p->p_vmspace->vm_map, start, end);
+ uvm_map_pageable(map, start, end, TRUE, 0);
}
/*
/* $OpenBSD$ */
/*
* MT race reproducer: unmap memory range while sysctl(2) marked it wired.
*/
#include <netinet/in.h>
#include <netinet/icmp6.h>
#include <sys/mman.h>
#include <sys/types.h>
#include <sys/socket.h>
#include <sys/sysctl.h>
#include <err.h>
#include <pthread.h>
#include <stdint.h>
#include <stdio.h>
#include <string.h>
#include <unistd.h>
static pthread_mutex_t gmtx = PTHREAD_MUTEX_INITIALIZER;
size_t addr = 0x20000000UL;
size_t len = 0x01000000UL;
void *
thr_unmap(void *arg)
{
pthread_mutex_lock(&gmtx);
for (;;)
munmap((void *)addr, 0x4000UL);
return NULL;
}
int
main(void)
{
pthread_t th;
size_t *olenp;
void *oldp, *newp;
char *pg;
int i, error, pgsz, *mib;
pgsz = getpagesize();
mmap((void *)addr, len, PROT_READ|PROT_WRITE,
MAP_ANON|MAP_FIXED|MAP_PRIVATE, -1, 0);
/* fault in all pages. */
for (i = 0; i < len; i+= pgsz) {
pg = (char *)(addr + i);
pg[0] = 1;
}
mib = (int *)(addr + 0xc0);
mib[0] = CTL_NET;
mib[1] = AF_INET6;
mib[2] = IPPROTO_ICMPV6;
mib[3] = ICMPV6CTL_REDIRTIMEOUT;
oldp = (void *)(addr + 0x40);
olenp = (size_t *)(addr + 0x100);
*olenp = 0;
/* new arguments */
newp = (char *)(addr + 0x80);
memcpy(newp, "somegarbage", 12);
pthread_mutex_lock(&gmtx);
/* competing thread. */
error = pthread_create(&th, NULL, thr_unmap, NULL);
if (error)
warn("pthread_create");
/* Ensure memory is still mapped. */
printf("%s\n", (char *)newp);
pthread_mutex_unlock(&gmtx);
for (i = 0; i < (100 * 100); i++) {
error = sysctl(mib, 4, oldp, olenp, newp, 12);
}
return 0;
}
wired pages vs sysctl(2)