From: Vitaliy Makkoveev Subject: Push netlock down to ifconf() and move copyout() out of netlock To: Alexander Bluhm , tech@openbsd.org Date: Thu, 13 Nov 2025 21:44:46 +0300 Link interface and address descriptors to the `if_tmplist_lock' protected temporary lists while holding netlock and then release it. Do copyout() while following these lists and holding only `if_tmplist_lock'. The pagefault handler releases exclusive netlock while copyout(9) needs to load pages from swap. So in the cases like ifconf() netlock protected lists could became inconsistent. Since we use shared netlock, pagefault handler leaves it held, but this breaks swap on NFS. I doubt someone uses swap on NFS, but we hold netlock for unpredictable time. ok? Index: sys/net/if.c =================================================================== RCS file: /cvs/src/sys/net/if.c,v retrieving revision 1.748 diff -u -p -r1.748 if.c --- sys/net/if.c 13 Nov 2025 16:20:45 -0000 1.748 +++ sys/net/if.c 13 Nov 2025 18:30:36 -0000 @@ -2531,10 +2531,7 @@ ifioctl_get(u_long cmd, caddr_t data) switch(cmd) { case SIOCGIFCONF: - NET_LOCK_SHARED(); - error = ifconf(data); - NET_UNLOCK_SHARED(); - return (error); + return (ifconf(data)); case SIOCIFGCLONERS: return (if_clone_list((struct if_clonereq *)data)); case SIOCGIFGMEMB: @@ -2727,14 +2724,17 @@ if_rxhprio_l3_check(int hdrprio) int ifconf(caddr_t data) { + TAILQ_HEAD(, ifnet) if_tmplist; + TAILQ_HEAD(, ifaddr) addr_tmplist; struct ifconf *ifc = (struct ifconf *)data; struct ifnet *ifp; struct ifaddr *ifa; struct ifreq ifr, *ifrp; - int space = ifc->ifc_len, error; + int space = ifc->ifc_len, error = 0; /* If ifc->ifc_len is 0, fill it in with the needed size and return. */ if (space == 0) { + NET_LOCK_SHARED(); TAILQ_FOREACH(ifp, &ifnetlist, if_list) { struct sockaddr *sa; @@ -2750,31 +2750,52 @@ ifconf(caddr_t data) space += sizeof(ifr); } } + NET_UNLOCK_SHARED(); ifc->ifc_len = space; return (0); } + TAILQ_INIT(&if_tmplist); + TAILQ_INIT(&addr_tmplist); + ifrp = ifc->ifc_req; memset(&ifr, 0, sizeof(ifr)); + + rw_enter_write(&if_tmplist_lock); + NET_LOCK_SHARED(); TAILQ_FOREACH(ifp, &ifnetlist, if_list) { + if_ref(ifp); + TAILQ_INSERT_TAIL(&if_tmplist, ifp, if_tmplist); + } + NET_UNLOCK_SHARED(); + + TAILQ_FOREACH(ifp, &if_tmplist, if_tmplist) { if (space < sizeof(ifr)) - break; + goto free_iflist; memcpy(ifr.ifr_name, ifp->if_xname, IFNAMSIZ); - if (TAILQ_EMPTY(&ifp->if_addrlist)) { + + NET_LOCK_SHARED(); + TAILQ_FOREACH(ifa, &ifp->if_addrlist, ifa_list) { + ifaref(ifa); + TAILQ_INSERT_TAIL(&addr_tmplist, ifa, ifa_tmplist); + } + NET_UNLOCK_SHARED(); + + if (TAILQ_EMPTY(&addr_tmplist)) { memset(&ifr.ifr_addr, 0, sizeof(ifr.ifr_addr)); error = copyout((caddr_t)&ifr, (caddr_t)ifrp, sizeof(ifr)); if (error) - return (error); + goto free_iflist; space -= sizeof(ifr); ifrp++; - } else - TAILQ_FOREACH(ifa, &ifp->if_addrlist, ifa_list) { + } else { + TAILQ_FOREACH(ifa, &addr_tmplist, ifa_tmplist) { struct sockaddr *sa = ifa->ifa_addr; if (space < sizeof(ifr)) - goto out; + goto free_addrlist; if (sa->sa_len <= sizeof(*sa)) { memset(&ifr.ifr_addr, 0, sizeof(ifr.ifr_addr)); @@ -2782,7 +2803,7 @@ ifconf(caddr_t data) error = copyout((caddr_t)&ifr, (caddr_t)ifrp, sizeof (ifr)); if (error) - return (error); + goto free_addrlist; space -= sizeof(ifr); ifrp++; @@ -2791,27 +2812,55 @@ ifconf(caddr_t data) sa->sa_len; if (space < total) - goto out; + goto free_addrlist; error = copyout((caddr_t)&ifr, (caddr_t)ifrp, sizeof(ifr.ifr_name)); if (error) - return (error); + goto free_addrlist; error = copyout((caddr_t)sa, (caddr_t)&ifrp->ifr_addr, sa->sa_len); if (error) - return (error); + goto free_addrlist; space -= total; ifrp = (struct ifreq *)( (caddr_t)ifrp + total); } } + } + + while((ifa = TAILQ_FIRST(&addr_tmplist))) { + TAILQ_REMOVE(&addr_tmplist, ifa, ifa_tmplist); + ifafree(ifa); + } + } + + while ((ifp = TAILQ_FIRST(&if_tmplist))) { + TAILQ_REMOVE(&if_tmplist, ifp, if_tmplist); + if_put(ifp); } -out: + rw_exit_write(&if_tmplist_lock); + ifc->ifc_len -= space; return (0); + +free_addrlist: + while((ifa = TAILQ_FIRST(&addr_tmplist))) { + TAILQ_REMOVE(&addr_tmplist, ifa, ifa_tmplist); + ifafree(ifa); + } +free_iflist: + while ((ifp = TAILQ_FIRST(&if_tmplist))) { + TAILQ_REMOVE(&if_tmplist, ifp, if_tmplist); + if_put(ifp); + } + rw_exit_write(&if_tmplist_lock); + + if (error == 0) + ifc->ifc_len -= space; + return (error); } void Index: sys/net/if_var.h =================================================================== RCS file: /cvs/src/sys/net/if_var.h,v retrieving revision 1.139 diff -u -p -r1.139 if_var.h --- sys/net/if_var.h 19 Jul 2025 16:40:40 -0000 1.139 +++ sys/net/if_var.h 13 Nov 2025 18:30:36 -0000 @@ -254,6 +254,7 @@ struct ifaddr { struct ifnet *ifa_ifp; /* back-pointer to interface */ TAILQ_ENTRY(ifaddr) ifa_list; /* [N] list of addresses for interface */ + TAILQ_ENTRY(ifaddr) ifa_tmplist;/* [T] temporary list */ u_int ifa_flags; /* interface flags, see below */ struct refcnt ifa_refcnt; /* number of `rt_ifa` references */ int ifa_metric; /* cost of going out this interface */