From: Alexander Bluhm Subject: shared netlock for soclose() To: tech@openbsd.org Date: Tue, 3 Jun 2025 14:33:03 +0200 Hi, Currently soclose() needs exclusive netlock. I see deadlocks as tcp_newtcpcb() calls pool_get(&tcpcb_pool) with PR_WAITOK while holding shared netlock. Running memory allocation in parallel although free needs an exclusive lock is a bad idea. Solution is to run soclose() in parallel. Diff consists of four parts: - always hold reference count of socket in inp_socket - inp_socket is not longer protected by special mutex - account when other CPU sets so_pcb to NULL - run soclose() and sofree() with shared netlock - tcp timeout reaper can go away Please test. I will split the diff for review. bluhm Index: kern/kern_sysctl.c =================================================================== RCS file: /data/mirror/openbsd/cvs/src/sys/kern/kern_sysctl.c,v diff -u -p -r1.469 kern_sysctl.c --- kern/kern_sysctl.c 1 Jun 2025 03:43:48 -0000 1.469 +++ kern/kern_sysctl.c 2 Jun 2025 18:32:24 -0000 @@ -1728,17 +1728,15 @@ do { \ mtx_enter(&(table)->inpt_mtx); \ while ((inp = in_pcb_iterator(table, inp, &iter)) != NULL) { \ if (buflen >= elem_size && elem_count > 0) { \ - mtx_enter(&inp->inp_sofree_mtx); \ - so = soref(inp->inp_socket); \ - mtx_leave(&inp->inp_sofree_mtx); \ + mtx_leave(&(table)->inpt_mtx); \ + NET_LOCK_SHARED(); \ + so = in_pcbsolock(inp); \ if (so == NULL) \ continue; \ - mtx_leave(&(table)->inpt_mtx); \ - solock_shared(so); \ fill_file(kf, NULL, NULL, 0, NULL, NULL, p, \ so, show_pointers); \ - sounlock_shared(so); \ - sorele(so); \ + in_pcbsounlock(inp, so); \ + NET_UNLOCK_SHARED(); \ error = copyout(kf, dp, outsize); \ mtx_enter(&(table)->inpt_mtx); \ if (error) { \ Index: kern/uipc_socket.c =================================================================== RCS file: /data/mirror/openbsd/cvs/src/sys/kern/uipc_socket.c,v diff -u -p -r1.378 uipc_socket.c --- kern/uipc_socket.c 23 May 2025 23:41:46 -0000 1.378 +++ kern/uipc_socket.c 2 Jun 2025 18:32:24 -0000 @@ -213,10 +213,7 @@ socreate(int dom, struct socket **aso, i if (error) { so->so_state |= SS_NOFDREF; /* sofree() calls sounlock(). */ - soref(so); - sofree(so, 1); - sounlock_shared(so); - sorele(so); + sofree(so, 0); return (error); } sounlock_shared(so); @@ -304,7 +301,7 @@ sofree(struct socket *so, int keep_lock) if (so->so_pcb || (so->so_state & SS_NOFDREF) == 0) { if (!keep_lock) - sounlock(so); + sounlock_shared(so); return; } if (so->so_head) { @@ -317,7 +314,7 @@ sofree(struct socket *so, int keep_lock) */ if (so->so_onq == &head->so_q) { if (!keep_lock) - sounlock(so); + sounlock_shared(so); return; } @@ -344,7 +341,7 @@ sofree(struct socket *so, int keep_lock) } if (!keep_lock) - sounlock(so); + sounlock_shared(so); sorele(so); } @@ -368,7 +365,7 @@ soclose(struct socket *so, int flags) struct socket *so2; int error = 0; - solock(so); + solock_shared(so); /* Revoke async IO early. There is a final revocation in sofree(). */ sigio_free(&so->so_sigio); if (so->so_state & SS_ISCONNECTED) { @@ -430,7 +427,7 @@ discard: if (so->so_sp) { struct socket *soback; - sounlock(so); + sounlock_shared(so); /* * Concurrent sounsplice() locks `sb_mtx' mutexes on * both `so_snd' and `so_rcv' before unsplice sockets. @@ -477,7 +474,7 @@ notsplicedback: task_del(sosplice_taskq, &so->so_sp->ssp_task); taskq_barrier(sosplice_taskq); - solock(so); + solock_shared(so); } #endif /* SOCKET_SPLICE */ Index: netinet/in_pcb.c =================================================================== RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/in_pcb.c,v diff -u -p -r1.314 in_pcb.c --- netinet/in_pcb.c 20 May 2025 05:51:43 -0000 1.314 +++ netinet/in_pcb.c 2 Jun 2025 18:32:24 -0000 @@ -236,8 +236,7 @@ in_pcballoc(struct socket *so, struct in if (inp == NULL) return (ENOBUFS); inp->inp_table = table; - inp->inp_socket = so; - mtx_init(&inp->inp_sofree_mtx, IPL_SOFTNET); + inp->inp_socket = soref(so); refcnt_init_trace(&inp->inp_refcnt, DT_REFCNT_IDX_INPCB); inp->inp_seclevel.sl_auth = IPSEC_AUTH_LEVEL_DEFAULT; inp->inp_seclevel.sl_esp_trans = IPSEC_ESP_TRANS_LEVEL_DEFAULT; @@ -584,10 +583,9 @@ in_pcbdetach(struct inpcb *inp) struct socket *so = inp->inp_socket; struct inpcbtable *table = inp->inp_table; + soassertlocked(so); + so->so_pcb = NULL; - mtx_enter(&inp->inp_sofree_mtx); - inp->inp_socket = NULL; - mtx_leave(&inp->inp_sofree_mtx); /* * As long as the NET_LOCK() is the default lock for Internet * sockets, do not release it to not introduce new sleeping @@ -623,22 +621,17 @@ in_pcbdetach(struct inpcb *inp) } struct socket * -in_pcbsolock_ref(struct inpcb *inp) +in_pcbsolock(struct inpcb *inp) { - struct socket *so; + struct socket *so = inp->inp_socket; NET_ASSERT_LOCKED(); - mtx_enter(&inp->inp_sofree_mtx); - so = soref(inp->inp_socket); - mtx_leave(&inp->inp_sofree_mtx); if (so == NULL) return NULL; rw_enter_write(&so->so_lock); - /* between mutex and rwlock inpcb could be detached */ if (so->so_pcb == NULL) { rw_exit_write(&so->so_lock); - sorele(so); return NULL; } KASSERT(inp->inp_socket == so && sotoinpcb(so) == inp); @@ -646,12 +639,13 @@ in_pcbsolock_ref(struct inpcb *inp) } void -in_pcbsounlock_rele(struct inpcb *inp, struct socket *so) +in_pcbsounlock(struct inpcb *inp, struct socket *so) { if (so == NULL) return; + if (inp != NULL && so->so_pcb != NULL) + KASSERT(inp->inp_socket == so && sotoinpcb(so) == inp); rw_exit_write(&so->so_lock); - sorele(so); } struct inpcb * @@ -670,6 +664,7 @@ in_pcbunref(struct inpcb *inp) return; if (refcnt_rele(&inp->inp_refcnt) == 0) return; + sorele(inp->inp_socket); KASSERT((LIST_NEXT(inp, inp_hash) == NULL) || (LIST_NEXT(inp, inp_hash) == _Q_INVALID)); KASSERT((LIST_NEXT(inp, inp_lhash) == NULL) || @@ -819,10 +814,10 @@ in_pcbnotifyall(struct inpcbtable *table continue; } mtx_leave(&table->inpt_mtx); - so = in_pcbsolock_ref(inp); + so = in_pcbsolock(inp); if (so != NULL) (*notify)(inp, errno); - in_pcbsounlock_rele(inp, so); + in_pcbsounlock(inp, so); mtx_enter(&table->inpt_mtx); } mtx_leave(&table->inpt_mtx); Index: netinet/in_pcb.h =================================================================== RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/in_pcb.h,v diff -u -p -r1.168 in_pcb.h --- netinet/in_pcb.h 20 May 2025 05:51:43 -0000 1.168 +++ netinet/in_pcb.h 2 Jun 2025 18:32:24 -0000 @@ -81,7 +81,6 @@ * t inpt_mtx pcb table mutex * L pf_inp_mtx link pf to inp mutex * s so_lock socket rwlock - * f inp_sofree_mtx socket detach and lock */ /* @@ -138,9 +137,8 @@ struct inpcb { #define inp_laddr6 inp_laddru.iau_addr6 u_int16_t inp_fport; /* [t] foreign port */ u_int16_t inp_lport; /* [t] local port */ - struct socket *inp_socket; /* [f] back pointer to socket */ - struct mutex inp_sofree_mtx; /* protect socket free */ - caddr_t inp_ppcb; /* pointer to per-protocol pcb */ + struct socket *inp_socket; /* [I] back pointer to socket */ + caddr_t inp_ppcb; /* [s] pointer to per-protocol pcb */ struct route inp_route; /* [s] cached route */ struct refcnt inp_refcnt; /* refcount PCB, delay memory free */ int inp_flags; /* generic IP/datagram flags */ @@ -311,8 +309,8 @@ int in_pcbaddrisavail(const struct inpc int in_pcbconnect(struct inpcb *, struct mbuf *); void in_pcbdetach(struct inpcb *); struct socket * - in_pcbsolock_ref(struct inpcb *); -void in_pcbsounlock_rele(struct inpcb *, struct socket *); + in_pcbsolock(struct inpcb *); +void in_pcbsounlock(struct inpcb *, struct socket *); struct inpcb * in_pcbref(struct inpcb *); void in_pcbunref(struct inpcb *); Index: netinet/ip_divert.c =================================================================== RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/ip_divert.c,v diff -u -p -r1.103 ip_divert.c --- netinet/ip_divert.c 22 May 2025 03:09:00 -0000 1.103 +++ netinet/ip_divert.c 2 Jun 2025 18:32:24 -0000 @@ -190,6 +190,7 @@ void divert_packet(struct mbuf *m, int dir, u_int16_t divert_port) { struct inpcb *inp = NULL; + void *pcb; struct socket *so; struct sockaddr_in sin; @@ -213,6 +214,12 @@ divert_packet(struct mbuf *m, int dir, u divstat_inc(divs_noport); goto bad; } + pcb = READ_ONCE(inp->inp_socket->so_pcb); + if (pcb == NULL) { + divstat_inc(divs_closing); + goto bad; + } + KASSERT(pcb == inp); memset(&sin, 0, sizeof(sin)); sin.sin_family = AF_INET; Index: netinet/ip_divert.h =================================================================== RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/ip_divert.h,v diff -u -p -r1.26 ip_divert.h --- netinet/ip_divert.h 12 Jul 2024 19:50:35 -0000 1.26 +++ netinet/ip_divert.h 2 Jun 2025 18:32:24 -0000 @@ -22,6 +22,7 @@ struct divstat { u_long divs_ipackets; /* total input packets */ u_long divs_noport; /* no socket on port */ + u_long divs_closing; /* inpcb exists, socket is closing */ u_long divs_fullsock; /* not delivered, input socket full */ u_long divs_opackets; /* total output packets */ u_long divs_errors; /* generic errors */ @@ -49,6 +50,7 @@ struct divstat { enum divstat_counters { divs_ipackets, divs_noport, + divs_closing, divs_fullsock, divs_opackets, divs_errors, Index: netinet/ip_var.h =================================================================== RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/ip_var.h,v diff -u -p -r1.121 ip_var.h --- netinet/ip_var.h 2 Mar 2025 21:28:32 -0000 1.121 +++ netinet/ip_var.h 2 Jun 2025 18:32:24 -0000 @@ -68,6 +68,7 @@ struct ipstat { u_long ips_cantforward; /* packets rcvd for unreachable dest */ u_long ips_redirectsent; /* packets forwarded on same net */ u_long ips_noproto; /* unknown or unsupported protocol */ + u_long ips_closing; /* inpcb exists, socket is closing */ u_long ips_delivered; /* datagrams delivered to upper level*/ u_long ips_localout; /* total ip packets generated here */ u_long ips_odropped; /* lost output due to nobufs, etc. */ @@ -116,6 +117,7 @@ enum ipstat_counters { ips_cantforward, /* packets rcvd for unreachable dest */ ips_redirectsent, /* packets forwarded on same net */ ips_noproto, /* unknown or unsupported protocol */ + ips_closing, /* inpcb exists, socket is closing */ ips_delivered, /* datagrams delivered to upper level*/ ips_localout, /* total ip packets generated here */ ips_odropped, /* lost output packets due to nobufs, etc. */ Index: netinet/raw_ip.c =================================================================== RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/raw_ip.c,v diff -u -p -r1.166 raw_ip.c --- netinet/raw_ip.c 11 Mar 2025 15:31:03 -0000 1.166 +++ netinet/raw_ip.c 2 Jun 2025 18:32:24 -0000 @@ -135,6 +135,7 @@ rip_input(struct mbuf **mp, int *offp, i struct ip *ip = mtod(m, struct ip *); struct inpcb_iterator iter = { .inp_table = NULL }; struct inpcb *inp, *last; + void *pcb; struct in_addr *key; struct sockaddr_in ripsrc; @@ -169,6 +170,12 @@ rip_input(struct mbuf **mp, int *offp, i while ((inp = in_pcb_iterator(&rawcbtable, inp, &iter)) != NULL) { KASSERT(!ISSET(inp->inp_flags, INP_IPV6)); + pcb = READ_ONCE(inp->inp_socket->so_pcb); + if (pcb == NULL) { + ipstat_inc(ips_closing); + continue; + } + KASSERT(pcb == inp); /* * Packet must not be inserted after disconnected wakeup * call. To avoid race, check again when holding receive Index: netinet/tcp_input.c =================================================================== RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/tcp_input.c,v diff -u -p -r1.449 tcp_input.c --- netinet/tcp_input.c 27 May 2025 07:52:49 -0000 1.449 +++ netinet/tcp_input.c 2 Jun 2025 18:32:24 -0000 @@ -385,7 +385,7 @@ tcp_input_mlist(struct mbuf_list *ml, in KASSERT(nxt == IPPROTO_DONE); } - in_pcbsounlock_rele(NULL, so); + in_pcbsounlock(NULL, so); } /* @@ -655,16 +655,15 @@ findpcb: *solocked = NULL; } else { if (solocked != NULL && *solocked != NULL) { - in_pcbsounlock_rele(NULL, *solocked); + in_pcbsounlock(NULL, *solocked); *solocked = NULL; } - so = in_pcbsolock_ref(inp); + so = in_pcbsolock(inp); } if (so == NULL) { - tcpstat_inc(tcps_noport); + tcpstat_inc(tcps_closing); goto dropwithreset_ratelim; } - KASSERT(sotoinpcb(inp->inp_socket) == inp); KASSERT(intotcpcb(inp) == NULL || intotcpcb(inp)->t_inpcb == inp); soassertlocked(inp->inp_socket); @@ -905,7 +904,7 @@ findpcb: if (solocked != NULL) *solocked = so; else - in_pcbsounlock_rele(inp, so); + in_pcbsounlock(inp, so); in_pcbunref(inp); return IPPROTO_DONE; } @@ -1084,7 +1083,7 @@ findpcb: if (solocked != NULL) *solocked = so; else - in_pcbsounlock_rele(inp, so); + in_pcbsounlock(inp, so); in_pcbunref(inp); return IPPROTO_DONE; } @@ -1138,7 +1137,7 @@ findpcb: if (solocked != NULL) *solocked = so; else - in_pcbsounlock_rele(inp, so); + in_pcbsounlock(inp, so); in_pcbunref(inp); return IPPROTO_DONE; } @@ -1332,7 +1331,7 @@ trimthenstep6: ((arc4random() & 0x7fffffff) | 0x8000); reuse = &iss; tp = tcp_close(tp); - in_pcbsounlock_rele(inp, so); + in_pcbsounlock(inp, so); so = NULL; in_pcbunref(inp); inp = NULL; @@ -2141,7 +2140,7 @@ dodata: /* XXX */ if (solocked != NULL) *solocked = so; else - in_pcbsounlock_rele(inp, so); + in_pcbsounlock(inp, so); in_pcbunref(inp); return IPPROTO_DONE; @@ -2174,7 +2173,7 @@ dropafterack: if (solocked != NULL) *solocked = so; else - in_pcbsounlock_rele(inp, so); + in_pcbsounlock(inp, so); in_pcbunref(inp); return IPPROTO_DONE; @@ -2210,7 +2209,7 @@ dropwithreset: (tcp_seq)0, TH_RST|TH_ACK, m->m_pkthdr.ph_rtableid, now); } m_freem(m); - in_pcbsounlock_rele(inp, so); + in_pcbsounlock(inp, so); in_pcbunref(inp); return IPPROTO_DONE; @@ -2222,7 +2221,7 @@ drop: tcp_trace(TA_DROP, ostate, tp, otp, &saveti.caddr, 0, tlen); m_freem(m); - in_pcbsounlock_rele(inp, so); + in_pcbsounlock(inp, so); in_pcbunref(inp); return IPPROTO_DONE; } @@ -3490,7 +3489,7 @@ syn_cache_timer(void *arg) mtx_leave(&syn_cache_mtx); NET_LOCK_SHARED(); - so = in_pcbsolock_ref(inp); + so = in_pcbsolock(inp); if (so != NULL) { now = tcp_now(); #ifdef TCP_ECN @@ -3499,7 +3498,7 @@ syn_cache_timer(void *arg) (void) syn_cache_respond(sc, NULL, now, do_ecn); tcpstat_inc(tcps_sc_retransmitted); } - in_pcbsounlock_rele(inp, so); + in_pcbsounlock(inp, so); NET_UNLOCK_SHARED(); in_pcbunref(inp); @@ -3622,7 +3621,7 @@ syn_cache_get(struct sockaddr *src, stru sc = syn_cache_lookup(src, dst, &scp, inp->inp_rtableid); if (sc == NULL) { mtx_leave(&syn_cache_mtx); - in_pcbsounlock_rele(inp, so); + in_pcbsounlock(inp, so); return (NULL); } @@ -3636,7 +3635,7 @@ syn_cache_get(struct sockaddr *src, stru refcnt_take(&sc->sc_refcnt); mtx_leave(&syn_cache_mtx); (void) syn_cache_respond(sc, m, now, do_ecn); - in_pcbsounlock_rele(inp, so); + in_pcbsounlock(inp, so); syn_cache_put(sc); return ((struct socket *)(-1)); } @@ -3767,7 +3766,7 @@ syn_cache_get(struct sockaddr *src, stru tp->rcv_adv = tp->rcv_nxt + sc->sc_win; tp->last_ack_sent = tp->rcv_nxt; - in_pcbsounlock_rele(listeninp, listenso); + in_pcbsounlock(listeninp, listenso); tcpstat_inc(tcps_sc_completed); syn_cache_put(sc); return (so); @@ -3779,8 +3778,8 @@ abort: if (tp != NULL) tp = tcp_drop(tp, ECONNABORTED); /* destroys socket */ m_freem(m); - in_pcbsounlock_rele(inp, so); - in_pcbsounlock_rele(listeninp, listenso); + in_pcbsounlock(inp, so); + in_pcbsounlock(listeninp, listenso); syn_cache_put(sc); tcpstat_inc(tcps_sc_aborted); return ((struct socket *)(-1)); Index: netinet/tcp_subr.c =================================================================== RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/tcp_subr.c,v diff -u -p -r1.210 tcp_subr.c --- netinet/tcp_subr.c 21 May 2025 09:33:49 -0000 1.210 +++ netinet/tcp_subr.c 2 Jun 2025 18:32:24 -0000 @@ -440,8 +440,6 @@ tcp_newtcpcb(struct inpcb *inp, int wait tp->t_inpcb = inp; for (i = 0; i < TCPT_NTIMERS; i++) TCP_TIMER_INIT(tp, i); - timeout_set_flags(&tp->t_timer_reaper, tcp_timer_reaper, tp, - KCLOCK_NONE, TIMEOUT_PROC | TIMEOUT_MPSAFE); tp->sack_enable = atomic_load_int(&tcp_do_sack); tp->t_flags = atomic_load_int(&tcp_do_rfc1323) ? @@ -528,9 +526,8 @@ tcp_close(struct tcpcb *tp) } m_free(tp->t_template); - /* Free tcpcb after all pending timers have been run. */ - timeout_add(&tp->t_timer_reaper, 0); inp->inp_ppcb = NULL; + pool_put(&tcpcb_pool, tp); soisdisconnected(so); in_pcbdetach(inp); tcpstat_inc(tcps_closed); @@ -692,7 +689,7 @@ tcp6_ctlinput(int cmd, struct sockaddr * return; } if (inp != NULL) - so = in_pcbsolock_ref(inp); + so = in_pcbsolock(inp); if (so != NULL) tp = intotcpcb(inp); if (tp != NULL) { @@ -702,7 +699,7 @@ tcp6_ctlinput(int cmd, struct sockaddr * SEQ_LT(seq, tp->snd_max)) notify(inp, inet6ctlerrmap[cmd]); } - in_pcbsounlock_rele(inp, so); + in_pcbsounlock(inp, so); in_pcbunref(inp); if (tp == NULL && @@ -762,7 +759,7 @@ tcp_ctlinput(int cmd, struct sockaddr *s ip->ip_dst, th->th_dport, ip->ip_src, th->th_sport, rdomain); if (inp != NULL) - so = in_pcbsolock_ref(inp); + so = in_pcbsolock(inp); if (so != NULL) tp = intotcpcb(inp); if (tp != NULL && @@ -779,7 +776,7 @@ tcp_ctlinput(int cmd, struct sockaddr *s */ mtu = (u_int)ntohs(icp->icmp_nextmtu); if (mtu >= tp->t_pmtud_mtu_sent) { - in_pcbsounlock_rele(inp, so); + in_pcbsounlock(inp, so); in_pcbunref(inp); return; } @@ -800,7 +797,7 @@ tcp_ctlinput(int cmd, struct sockaddr *s */ if (tp->t_flags & TF_PMTUD_PEND) { if (SEQ_LT(tp->t_pmtud_th_seq, seq)) { - in_pcbsounlock_rele(inp, so); + in_pcbsounlock(inp, so); in_pcbunref(inp); return; } @@ -810,17 +807,17 @@ tcp_ctlinput(int cmd, struct sockaddr *s tp->t_pmtud_nextmtu = icp->icmp_nextmtu; tp->t_pmtud_ip_len = icp->icmp_ip.ip_len; tp->t_pmtud_ip_hl = icp->icmp_ip.ip_hl; - in_pcbsounlock_rele(inp, so); + in_pcbsounlock(inp, so); in_pcbunref(inp); return; } } else { /* ignore if we don't have a matching connection */ - in_pcbsounlock_rele(inp, so); + in_pcbsounlock(inp, so); in_pcbunref(inp); return; } - in_pcbsounlock_rele(inp, so); + in_pcbsounlock(inp, so); in_pcbunref(inp); notify = tcp_mtudisc, ip = NULL; } else if (cmd == PRC_MTUINC) @@ -840,7 +837,7 @@ tcp_ctlinput(int cmd, struct sockaddr *s ip->ip_dst, th->th_dport, ip->ip_src, th->th_sport, rdomain); if (inp != NULL) - so = in_pcbsolock_ref(inp); + so = in_pcbsolock(inp); if (so != NULL) tp = intotcpcb(inp); if (tp != NULL) { @@ -849,7 +846,7 @@ tcp_ctlinput(int cmd, struct sockaddr *s SEQ_LT(seq, tp->snd_max)) notify(inp, errno); } - in_pcbsounlock_rele(inp, so); + in_pcbsounlock(inp, so); in_pcbunref(inp); if (tp == NULL && Index: netinet/tcp_timer.c =================================================================== RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/tcp_timer.c,v diff -u -p -r1.83 tcp_timer.c --- netinet/tcp_timer.c 12 Feb 2025 21:28:11 -0000 1.83 +++ netinet/tcp_timer.c 2 Jun 2025 18:32:24 -0000 @@ -91,7 +91,7 @@ tcp_timer_enter(struct inpcb *inp, struc KASSERT(timer < TCPT_NTIMERS); NET_LOCK_SHARED(); - *so = in_pcbsolock_ref(inp); + *so = in_pcbsolock(inp); if (*so == NULL) { *tp = NULL; return -1; @@ -109,7 +109,7 @@ tcp_timer_enter(struct inpcb *inp, struc static inline void tcp_timer_leave(struct inpcb *inp, struct socket *so) { - in_pcbsounlock_rele(inp, so); + in_pcbsounlock(inp, so); NET_UNLOCK_SHARED(); in_pcbunref(inp); } @@ -237,7 +237,7 @@ tcp_timer_rexmt(void *arg) sin.sin_family = AF_INET; sin.sin_addr = inp->inp_faddr; - in_pcbsounlock_rele(inp, so); + in_pcbsounlock(inp, so); in_pcbunref(inp); icmp_mtudisc(&icmp, rtableid); @@ -540,20 +540,4 @@ tcp_timer_2msl(void *arg) tcp_trace(TA_TIMER, ostate, tp, otp, NULL, TCPT_2MSL, 0); out: tcp_timer_leave(inp, so); -} - -void -tcp_timer_reaper(void *arg) -{ - struct tcpcb *tp = arg; - - /* - * This timer is necessary to delay the pool_put() after all timers - * have finished, even if they were sleeping to grab the net lock. - * Putting the pool_put() in a timer is sufficient as all timers run - * from the same timeout thread. Note that neither softnet thread nor - * user process may access the tcpcb after arming the reaper timer. - * Freeing may run in parallel as it does not grab the net lock. - */ - pool_put(&tcpcb_pool, tp); } Index: netinet/tcp_timer.h =================================================================== RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/tcp_timer.h,v diff -u -p -r1.26 tcp_timer.h --- netinet/tcp_timer.h 16 Jan 2025 11:59:20 -0000 1.26 +++ netinet/tcp_timer.h 2 Jun 2025 18:32:24 -0000 @@ -164,8 +164,5 @@ extern int tcp_keepidle_sec; /* [a] copy extern int tcp_keepintvl_sec; /* [a] copy of above in seconds for sysctl */ extern int tcp_ttl; /* time to live for TCP segs */ extern const int tcp_backoff[]; - -void tcp_timer_reaper(void *); - #endif /* _KERNEL */ #endif /* _NETINET_TCP_TIMER_H_ */ Index: netinet/tcp_usrreq.c =================================================================== RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/tcp_usrreq.c,v diff -u -p -r1.248 tcp_usrreq.c --- netinet/tcp_usrreq.c 21 May 2025 09:33:49 -0000 1.248 +++ netinet/tcp_usrreq.c 2 Jun 2025 18:32:24 -0000 @@ -1214,7 +1214,7 @@ tcp_ident(void *oldp, size_t *oldlenp, v struct tcpcb *tp = NULL; if (inp != NULL) { - so = in_pcbsolock_ref(inp); + so = in_pcbsolock(inp); if (so != NULL) tp = intotcpcb(inp); } @@ -1223,7 +1223,7 @@ tcp_ident(void *oldp, size_t *oldlenp, v else error = ESRCH; - in_pcbsounlock_rele(inp, so); + in_pcbsounlock(inp, so); NET_UNLOCK_SHARED(); in_pcbunref(inp); return (error); @@ -1246,7 +1246,7 @@ tcp_ident(void *oldp, size_t *oldlenp, v } if (inp != NULL) - so = in_pcbsolock_ref(inp); + so = in_pcbsolock(inp); if (so != NULL && ISSET(so->so_state, SS_CONNECTOUT)) { tir.ruid = so->so_ruid; @@ -1256,7 +1256,7 @@ tcp_ident(void *oldp, size_t *oldlenp, v tir.euid = -1; } - in_pcbsounlock_rele(inp, so); + in_pcbsounlock(inp, so); NET_UNLOCK_SHARED(); in_pcbunref(inp); Index: netinet/tcp_var.h =================================================================== RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/tcp_var.h,v diff -u -p -r1.191 tcp_var.h --- netinet/tcp_var.h 7 May 2025 14:10:19 -0000 1.191 +++ netinet/tcp_var.h 2 Jun 2025 18:32:24 -0000 @@ -70,7 +70,6 @@ struct tcpqent { struct tcpcb { struct tcpqehead t_segq; /* sequencing queue */ struct timeout t_timer[TCPT_NTIMERS]; /* tcp timers */ - struct timeout t_timer_reaper; /* reaper is special, no refcnt */ short t_state; /* state of this connection */ short t_rxtshift; /* log(2) of rexmt exp. backoff */ int t_rxtcur; /* current retransmit value */ @@ -393,6 +392,7 @@ struct tcpstat { u_int32_t tcps_pcbhashmiss; /* input packets missing pcb hash */ u_int32_t tcps_noport; /* no socket on port */ + u_int32_t tcps_closing; /* inpcb exists, socket is closing */ u_int32_t tcps_badsyn; /* SYN packet with src==dst rcv'ed */ u_int32_t tcps_dropsyn; /* SYN packet dropped */ @@ -583,6 +583,7 @@ enum tcpstat_counters { tcps_preddat, tcps_pcbhashmiss, tcps_noport, + tcps_closing, tcps_badsyn, tcps_dropsyn, tcps_rcvbadsig, Index: netinet/udp_usrreq.c =================================================================== RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/udp_usrreq.c,v diff -u -p -r1.339 udp_usrreq.c --- netinet/udp_usrreq.c 27 May 2025 07:52:49 -0000 1.339 +++ netinet/udp_usrreq.c 2 Jun 2025 18:32:24 -0000 @@ -198,6 +198,7 @@ udp_input(struct mbuf **mp, int *offp, i struct ip *ip = NULL; struct udphdr *uh; struct inpcb *inp = NULL; + void *pcb; struct ip save_ip; int len; u_int16_t savesum; @@ -419,6 +420,12 @@ udp_input(struct mbuf **mp, int *offp, i else KASSERT(!ISSET(inp->inp_flags, INP_IPV6)); + pcb = READ_ONCE(inp->inp_socket->so_pcb); + if (pcb == NULL) { + udpstat_inc(udps_closing); + continue; + } + KASSERT(pcb == inp); if (inp->inp_socket->so_rcv.sb_state & SS_CANTRCVMORE) continue; if (rtable_l2(inp->inp_rtableid) != @@ -596,7 +603,12 @@ udp_input(struct mbuf **mp, int *offp, i return IPPROTO_DONE; } - KASSERT(sotoinpcb(inp->inp_socket) == inp); + pcb = READ_ONCE(inp->inp_socket->so_pcb); + if (pcb == NULL) { + udpstat_inc(udps_closing); + goto bad; + } + KASSERT(pcb == inp); soassertlocked_readonly(inp->inp_socket); #ifdef INET6 @@ -921,10 +933,10 @@ udp_ctlinput(int cmd, struct sockaddr *s ip->ip_dst, uhp->uh_dport, ip->ip_src, uhp->uh_sport, rdomain); if (inp != NULL) - so = in_pcbsolock_ref(inp); + so = in_pcbsolock(inp); if (so != NULL) notify(inp, errno); - in_pcbsounlock_rele(inp, so); + in_pcbsounlock(inp, so); in_pcbunref(inp); } else in_pcbnotifyall(&udbtable, satosin(sa), rdomain, errno, notify); Index: netinet/udp_var.h =================================================================== RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/udp_var.h,v diff -u -p -r1.53 udp_var.h --- netinet/udp_var.h 2 Mar 2025 21:28:32 -0000 1.53 +++ netinet/udp_var.h 2 Jun 2025 18:32:24 -0000 @@ -61,6 +61,7 @@ struct udpstat { u_long udps_badlen; /* data length larger than packet */ u_long udps_noport; /* no socket on port */ u_long udps_noportbcast; /* of above, arrived as broadcast */ + u_long udps_closing; /* inpcb exists, socket is closing */ u_long udps_nosec; /* dropped for lack of ipsec */ u_long udps_fullsock; /* not delivered, input socket full */ u_long udps_pcbhashmiss; /* input packets missing pcb hash */ @@ -104,6 +105,7 @@ enum udpstat_counters { udps_badlen, /* data length larger than packet */ udps_noport, /* no socket on port */ udps_noportbcast, /* of above, arrived as broadcast */ + udps_closing, /* inpcb exists, socket is closing */ udps_nosec, /* dropped for lack of ipsec */ udps_fullsock, /* not delivered, input socket full */ udps_pcbhashmiss, /* input packets missing pcb hash */ Index: netinet6/in6_pcb.c =================================================================== RCS file: /data/mirror/openbsd/cvs/src/sys/netinet6/in6_pcb.c,v diff -u -p -r1.148 in6_pcb.c --- netinet6/in6_pcb.c 4 May 2025 23:05:17 -0000 1.148 +++ netinet6/in6_pcb.c 2 Jun 2025 18:32:24 -0000 @@ -546,10 +546,10 @@ in6_pcbnotify(struct inpcbtable *table, } do_notify: mtx_leave(&table->inpt_mtx); - so = in_pcbsolock_ref(inp); + so = in_pcbsolock(inp); if (so != NULL) (*notify)(inp, errno); - in_pcbsounlock_rele(inp, so); + in_pcbsounlock(inp, so); mtx_enter(&table->inpt_mtx); } mtx_leave(&table->inpt_mtx); Index: netinet6/ip6_divert.c =================================================================== RCS file: /data/mirror/openbsd/cvs/src/sys/netinet6/ip6_divert.c,v diff -u -p -r1.102 ip6_divert.c --- netinet6/ip6_divert.c 22 May 2025 03:09:00 -0000 1.102 +++ netinet6/ip6_divert.c 2 Jun 2025 18:32:24 -0000 @@ -199,6 +199,7 @@ void divert6_packet(struct mbuf *m, int dir, u_int16_t divert_port) { struct inpcb *inp = NULL; + void *pcb; struct socket *so; struct sockaddr_in6 sin6; @@ -222,6 +223,12 @@ divert6_packet(struct mbuf *m, int dir, div6stat_inc(div6s_noport); goto bad; } + pcb = READ_ONCE(inp->inp_socket->so_pcb); + if (pcb == NULL) { + div6stat_inc(div6s_closing); + goto bad; + } + KASSERT(pcb == inp); memset(&sin6, 0, sizeof(sin6)); sin6.sin6_family = AF_INET6; Index: netinet6/ip6_divert.h =================================================================== RCS file: /data/mirror/openbsd/cvs/src/sys/netinet6/ip6_divert.h,v diff -u -p -r1.23 ip6_divert.h --- netinet6/ip6_divert.h 1 Jan 2024 18:52:09 -0000 1.23 +++ netinet6/ip6_divert.h 2 Jun 2025 18:32:24 -0000 @@ -22,6 +22,7 @@ struct div6stat { u_long divs_ipackets; /* total input packets */ u_long divs_noport; /* no socket on port */ + u_long divs_closing; /* inpcb exists, socket is closing */ u_long divs_fullsock; /* not delivered, input socket full */ u_long divs_opackets; /* total output packets */ u_long divs_errors; /* generic errors */ @@ -49,6 +50,7 @@ struct div6stat { enum div6stat_counters { div6s_ipackets, div6s_noport, + div6s_closing, div6s_fullsock, div6s_opackets, div6s_errors, Index: netinet6/raw_ip6.c =================================================================== RCS file: /data/mirror/openbsd/cvs/src/sys/netinet6/raw_ip6.c,v diff -u -p -r1.192 raw_ip6.c --- netinet6/raw_ip6.c 27 May 2025 07:52:49 -0000 1.192 +++ netinet6/raw_ip6.c 2 Jun 2025 18:32:24 -0000 @@ -138,6 +138,7 @@ rip6_input(struct mbuf **mp, int *offp, struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *); struct inpcb_iterator iter = { .inp_table = NULL }; struct inpcb *inp, *last; + void *pcb; struct in6_addr *key; struct sockaddr_in6 rip6src; uint8_t type; @@ -184,6 +185,12 @@ rip6_input(struct mbuf **mp, int *offp, while ((inp = in_pcb_iterator(&rawin6pcbtable, inp, &iter)) != NULL) { KASSERT(ISSET(inp->inp_flags, INP_IPV6)); + pcb = READ_ONCE(inp->inp_socket->so_pcb); + if (pcb == NULL) { + rip6stat_inc(rip6s_closing); + continue; + } + KASSERT(pcb == inp); /* * Packet must not be inserted after disconnected wakeup * call. To avoid race, check again when holding receive Index: netinet6/raw_ip6.h =================================================================== RCS file: /data/mirror/openbsd/cvs/src/sys/netinet6/raw_ip6.h,v diff -u -p -r1.4 raw_ip6.h --- netinet6/raw_ip6.h 9 Feb 2017 15:23:35 -0000 1.4 +++ netinet6/raw_ip6.h 2 Jun 2025 18:32:24 -0000 @@ -42,6 +42,7 @@ struct rip6stat { u_int64_t rip6s_badsum; /* of above, checksum error */ u_int64_t rip6s_nosock; /* no matching socket */ u_int64_t rip6s_nosockmcast; /* of above, arrived as multicast */ + u_int64_t rip6s_closing; /* inpcb exists, socket is closing */ u_int64_t rip6s_fullsock; /* not delivered, input socket full */ u_int64_t rip6s_opackets; /* total output packets */ @@ -68,6 +69,7 @@ enum rip6stat_counters { rip6s_badsum, rip6s_nosock, rip6s_nosockmcast, + rip6s_closing, rip6s_fullsock, rip6s_opackets, rip6s_ncounters,