Download raw body.
incpb socket ref count
Hi,
Instead of protecting inp_socket with special mutex when being freed
and reference counting in in_pcbsolock_ref(), better reference count
inp_socket.
When an inpcb is created, inp_socket is initialized with a socket
pointer that is reference counted. When the inpcb is freed, release
the socket reference. As incpb is reference counted itself, we can
always access the socket memory when we have a valid inpcb counter.
in_pcbsolock() can just lock the socket, no reference counting
needed. This reduces contention a bit. As in_pcbdetach() is
protected by socket lock, in_pcbsolock() has to check so_pcb while
holding the lock to skip sockets that are being closed.
ok?
bluhm
Index: kern/kern_sysctl.c
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/kern/kern_sysctl.c,v
diff -u -p -r1.468 kern_sysctl.c
--- kern/kern_sysctl.c 9 May 2025 14:53:22 -0000 1.468
+++ kern/kern_sysctl.c 21 May 2025 04:50:15 -0000
@@ -1722,17 +1722,15 @@ do { \
mtx_enter(&(table)->inpt_mtx); \
while ((inp = in_pcb_iterator(table, inp, &iter)) != NULL) { \
if (buflen >= elem_size && elem_count > 0) { \
- mtx_enter(&inp->inp_sofree_mtx); \
- so = soref(inp->inp_socket); \
- mtx_leave(&inp->inp_sofree_mtx); \
+ mtx_leave(&(table)->inpt_mtx); \
+ NET_LOCK_SHARED(); \
+ so = in_pcbsolock(inp); \
if (so == NULL) \
continue; \
- mtx_leave(&(table)->inpt_mtx); \
- solock_shared(so); \
fill_file(kf, NULL, NULL, 0, NULL, NULL, p, \
so, show_pointers); \
- sounlock_shared(so); \
- sorele(so); \
+ in_pcbsounlock(inp, so); \
+ NET_UNLOCK_SHARED(); \
error = copyout(kf, dp, outsize); \
mtx_enter(&(table)->inpt_mtx); \
if (error) { \
Index: netinet/in_pcb.c
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/in_pcb.c,v
diff -u -p -r1.314 in_pcb.c
--- netinet/in_pcb.c 20 May 2025 05:51:43 -0000 1.314
+++ netinet/in_pcb.c 21 May 2025 04:50:15 -0000
@@ -236,8 +236,7 @@ in_pcballoc(struct socket *so, struct in
if (inp == NULL)
return (ENOBUFS);
inp->inp_table = table;
- inp->inp_socket = so;
- mtx_init(&inp->inp_sofree_mtx, IPL_SOFTNET);
+ inp->inp_socket = soref(so);
refcnt_init_trace(&inp->inp_refcnt, DT_REFCNT_IDX_INPCB);
inp->inp_seclevel.sl_auth = IPSEC_AUTH_LEVEL_DEFAULT;
inp->inp_seclevel.sl_esp_trans = IPSEC_ESP_TRANS_LEVEL_DEFAULT;
@@ -584,10 +583,9 @@ in_pcbdetach(struct inpcb *inp)
struct socket *so = inp->inp_socket;
struct inpcbtable *table = inp->inp_table;
+ soassertlocked(so);
+
so->so_pcb = NULL;
- mtx_enter(&inp->inp_sofree_mtx);
- inp->inp_socket = NULL;
- mtx_leave(&inp->inp_sofree_mtx);
/*
* As long as the NET_LOCK() is the default lock for Internet
* sockets, do not release it to not introduce new sleeping
@@ -623,22 +621,17 @@ in_pcbdetach(struct inpcb *inp)
}
struct socket *
-in_pcbsolock_ref(struct inpcb *inp)
+in_pcbsolock(struct inpcb *inp)
{
- struct socket *so;
+ struct socket *so = inp->inp_socket;
NET_ASSERT_LOCKED();
- mtx_enter(&inp->inp_sofree_mtx);
- so = soref(inp->inp_socket);
- mtx_leave(&inp->inp_sofree_mtx);
if (so == NULL)
return NULL;
rw_enter_write(&so->so_lock);
- /* between mutex and rwlock inpcb could be detached */
if (so->so_pcb == NULL) {
rw_exit_write(&so->so_lock);
- sorele(so);
return NULL;
}
KASSERT(inp->inp_socket == so && sotoinpcb(so) == inp);
@@ -646,12 +639,13 @@ in_pcbsolock_ref(struct inpcb *inp)
}
void
-in_pcbsounlock_rele(struct inpcb *inp, struct socket *so)
+in_pcbsounlock(struct inpcb *inp, struct socket *so)
{
if (so == NULL)
return;
+ if (inp != NULL && so->so_pcb != NULL)
+ KASSERT(inp->inp_socket == so && sotoinpcb(so) == inp);
rw_exit_write(&so->so_lock);
- sorele(so);
}
struct inpcb *
@@ -670,6 +664,7 @@ in_pcbunref(struct inpcb *inp)
return;
if (refcnt_rele(&inp->inp_refcnt) == 0)
return;
+ sorele(inp->inp_socket);
KASSERT((LIST_NEXT(inp, inp_hash) == NULL) ||
(LIST_NEXT(inp, inp_hash) == _Q_INVALID));
KASSERT((LIST_NEXT(inp, inp_lhash) == NULL) ||
@@ -819,10 +814,10 @@ in_pcbnotifyall(struct inpcbtable *table
continue;
}
mtx_leave(&table->inpt_mtx);
- so = in_pcbsolock_ref(inp);
+ so = in_pcbsolock(inp);
if (so != NULL)
(*notify)(inp, errno);
- in_pcbsounlock_rele(inp, so);
+ in_pcbsounlock(inp, so);
mtx_enter(&table->inpt_mtx);
}
mtx_leave(&table->inpt_mtx);
Index: netinet/in_pcb.h
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/in_pcb.h,v
diff -u -p -r1.168 in_pcb.h
--- netinet/in_pcb.h 20 May 2025 05:51:43 -0000 1.168
+++ netinet/in_pcb.h 21 May 2025 04:51:43 -0000
@@ -81,7 +81,6 @@
* t inpt_mtx pcb table mutex
* L pf_inp_mtx link pf to inp mutex
* s so_lock socket rwlock
- * f inp_sofree_mtx socket detach and lock
*/
/*
@@ -138,8 +137,7 @@ struct inpcb {
#define inp_laddr6 inp_laddru.iau_addr6
u_int16_t inp_fport; /* [t] foreign port */
u_int16_t inp_lport; /* [t] local port */
- struct socket *inp_socket; /* [f] back pointer to socket */
- struct mutex inp_sofree_mtx; /* protect socket free */
+ struct socket *inp_socket; /* [I] back pointer to socket */
caddr_t inp_ppcb; /* pointer to per-protocol pcb */
struct route inp_route; /* [s] cached route */
struct refcnt inp_refcnt; /* refcount PCB, delay memory free */
@@ -311,8 +309,8 @@ int in_pcbaddrisavail(const struct inpc
int in_pcbconnect(struct inpcb *, struct mbuf *);
void in_pcbdetach(struct inpcb *);
struct socket *
- in_pcbsolock_ref(struct inpcb *);
-void in_pcbsounlock_rele(struct inpcb *, struct socket *);
+ in_pcbsolock(struct inpcb *);
+void in_pcbsounlock(struct inpcb *, struct socket *);
struct inpcb *
in_pcbref(struct inpcb *);
void in_pcbunref(struct inpcb *);
Index: netinet/tcp_input.c
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/tcp_input.c,v
diff -u -p -r1.447 tcp_input.c
--- netinet/tcp_input.c 19 May 2025 02:27:57 -0000 1.447
+++ netinet/tcp_input.c 21 May 2025 04:50:15 -0000
@@ -385,7 +385,7 @@ tcp_input_mlist(struct mbuf_list *ml, in
KASSERT(nxt == IPPROTO_DONE);
}
- in_pcbsounlock_rele(NULL, so);
+ in_pcbsounlock(NULL, so);
}
/*
@@ -655,10 +655,10 @@ findpcb:
*solocked = NULL;
} else {
if (solocked != NULL && *solocked != NULL) {
- in_pcbsounlock_rele(NULL, *solocked);
+ in_pcbsounlock(NULL, *solocked);
*solocked = NULL;
}
- so = in_pcbsolock_ref(inp);
+ so = in_pcbsolock(inp);
}
if (so == NULL) {
tcpstat_inc(tcps_noport);
@@ -905,7 +905,7 @@ findpcb:
if (solocked != NULL)
*solocked = so;
else
- in_pcbsounlock_rele(inp, so);
+ in_pcbsounlock(inp, so);
in_pcbunref(inp);
return IPPROTO_DONE;
}
@@ -1084,7 +1084,7 @@ findpcb:
if (solocked != NULL)
*solocked = so;
else
- in_pcbsounlock_rele(inp, so);
+ in_pcbsounlock(inp, so);
in_pcbunref(inp);
return IPPROTO_DONE;
}
@@ -1138,7 +1138,7 @@ findpcb:
if (solocked != NULL)
*solocked = so;
else
- in_pcbsounlock_rele(inp, so);
+ in_pcbsounlock(inp, so);
in_pcbunref(inp);
return IPPROTO_DONE;
}
@@ -1332,7 +1332,7 @@ trimthenstep6:
((arc4random() & 0x7fffffff) | 0x8000);
reuse = &iss;
tp = tcp_close(tp);
- in_pcbsounlock_rele(inp, so);
+ in_pcbsounlock(inp, so);
so = NULL;
in_pcbunref(inp);
inp = NULL;
@@ -2141,7 +2141,7 @@ dodata: /* XXX */
if (solocked != NULL)
*solocked = so;
else
- in_pcbsounlock_rele(inp, so);
+ in_pcbsounlock(inp, so);
in_pcbunref(inp);
return IPPROTO_DONE;
@@ -2174,7 +2174,7 @@ dropafterack:
if (solocked != NULL)
*solocked = so;
else
- in_pcbsounlock_rele(inp, so);
+ in_pcbsounlock(inp, so);
in_pcbunref(inp);
return IPPROTO_DONE;
@@ -2210,7 +2210,7 @@ dropwithreset:
(tcp_seq)0, TH_RST|TH_ACK, m->m_pkthdr.ph_rtableid, now);
}
m_freem(m);
- in_pcbsounlock_rele(inp, so);
+ in_pcbsounlock(inp, so);
in_pcbunref(inp);
return IPPROTO_DONE;
@@ -2222,7 +2222,7 @@ drop:
tcp_trace(TA_DROP, ostate, tp, otp, &saveti.caddr, 0, tlen);
m_freem(m);
- in_pcbsounlock_rele(inp, so);
+ in_pcbsounlock(inp, so);
in_pcbunref(inp);
return IPPROTO_DONE;
}
@@ -3490,7 +3490,7 @@ syn_cache_timer(void *arg)
mtx_leave(&syn_cache_mtx);
NET_LOCK_SHARED();
- so = in_pcbsolock_ref(inp);
+ so = in_pcbsolock(inp);
if (so != NULL) {
now = tcp_now();
#ifdef TCP_ECN
@@ -3499,7 +3499,7 @@ syn_cache_timer(void *arg)
(void) syn_cache_respond(sc, NULL, now, do_ecn);
tcpstat_inc(tcps_sc_retransmitted);
}
- in_pcbsounlock_rele(inp, so);
+ in_pcbsounlock(inp, so);
NET_UNLOCK_SHARED();
in_pcbunref(inp);
@@ -3622,7 +3622,7 @@ syn_cache_get(struct sockaddr *src, stru
sc = syn_cache_lookup(src, dst, &scp, inp->inp_rtableid);
if (sc == NULL) {
mtx_leave(&syn_cache_mtx);
- in_pcbsounlock_rele(inp, so);
+ in_pcbsounlock(inp, so);
return (NULL);
}
@@ -3636,7 +3636,7 @@ syn_cache_get(struct sockaddr *src, stru
refcnt_take(&sc->sc_refcnt);
mtx_leave(&syn_cache_mtx);
(void) syn_cache_respond(sc, m, now, do_ecn);
- in_pcbsounlock_rele(inp, so);
+ in_pcbsounlock(inp, so);
syn_cache_put(sc);
return ((struct socket *)(-1));
}
@@ -3767,7 +3767,7 @@ syn_cache_get(struct sockaddr *src, stru
tp->rcv_adv = tp->rcv_nxt + sc->sc_win;
tp->last_ack_sent = tp->rcv_nxt;
- in_pcbsounlock_rele(listeninp, listenso);
+ in_pcbsounlock(listeninp, listenso);
tcpstat_inc(tcps_sc_completed);
syn_cache_put(sc);
return (so);
@@ -3779,8 +3779,8 @@ abort:
if (tp != NULL)
tp = tcp_drop(tp, ECONNABORTED); /* destroys socket */
m_freem(m);
- in_pcbsounlock_rele(inp, so);
- in_pcbsounlock_rele(listeninp, listenso);
+ in_pcbsounlock(inp, so);
+ in_pcbsounlock(listeninp, listenso);
syn_cache_put(sc);
tcpstat_inc(tcps_sc_aborted);
return ((struct socket *)(-1));
Index: netinet/tcp_subr.c
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/tcp_subr.c,v
diff -u -p -r1.209 tcp_subr.c
--- netinet/tcp_subr.c 2 Mar 2025 21:28:32 -0000 1.209
+++ netinet/tcp_subr.c 21 May 2025 04:50:15 -0000
@@ -692,7 +692,7 @@ tcp6_ctlinput(int cmd, struct sockaddr *
return;
}
if (inp != NULL)
- so = in_pcbsolock_ref(inp);
+ so = in_pcbsolock(inp);
if (so != NULL)
tp = intotcpcb(inp);
if (tp != NULL) {
@@ -702,7 +702,7 @@ tcp6_ctlinput(int cmd, struct sockaddr *
SEQ_LT(seq, tp->snd_max))
notify(inp, inet6ctlerrmap[cmd]);
}
- in_pcbsounlock_rele(inp, so);
+ in_pcbsounlock(inp, so);
in_pcbunref(inp);
if (tp == NULL &&
@@ -762,7 +762,7 @@ tcp_ctlinput(int cmd, struct sockaddr *s
ip->ip_dst, th->th_dport, ip->ip_src, th->th_sport,
rdomain);
if (inp != NULL)
- so = in_pcbsolock_ref(inp);
+ so = in_pcbsolock(inp);
if (so != NULL)
tp = intotcpcb(inp);
if (tp != NULL &&
@@ -779,7 +779,7 @@ tcp_ctlinput(int cmd, struct sockaddr *s
*/
mtu = (u_int)ntohs(icp->icmp_nextmtu);
if (mtu >= tp->t_pmtud_mtu_sent) {
- in_pcbsounlock_rele(inp, so);
+ in_pcbsounlock(inp, so);
in_pcbunref(inp);
return;
}
@@ -800,7 +800,7 @@ tcp_ctlinput(int cmd, struct sockaddr *s
*/
if (tp->t_flags & TF_PMTUD_PEND) {
if (SEQ_LT(tp->t_pmtud_th_seq, seq)) {
- in_pcbsounlock_rele(inp, so);
+ in_pcbsounlock(inp, so);
in_pcbunref(inp);
return;
}
@@ -810,17 +810,17 @@ tcp_ctlinput(int cmd, struct sockaddr *s
tp->t_pmtud_nextmtu = icp->icmp_nextmtu;
tp->t_pmtud_ip_len = icp->icmp_ip.ip_len;
tp->t_pmtud_ip_hl = icp->icmp_ip.ip_hl;
- in_pcbsounlock_rele(inp, so);
+ in_pcbsounlock(inp, so);
in_pcbunref(inp);
return;
}
} else {
/* ignore if we don't have a matching connection */
- in_pcbsounlock_rele(inp, so);
+ in_pcbsounlock(inp, so);
in_pcbunref(inp);
return;
}
- in_pcbsounlock_rele(inp, so);
+ in_pcbsounlock(inp, so);
in_pcbunref(inp);
notify = tcp_mtudisc, ip = NULL;
} else if (cmd == PRC_MTUINC)
@@ -840,7 +840,7 @@ tcp_ctlinput(int cmd, struct sockaddr *s
ip->ip_dst, th->th_dport, ip->ip_src, th->th_sport,
rdomain);
if (inp != NULL)
- so = in_pcbsolock_ref(inp);
+ so = in_pcbsolock(inp);
if (so != NULL)
tp = intotcpcb(inp);
if (tp != NULL) {
@@ -849,7 +849,7 @@ tcp_ctlinput(int cmd, struct sockaddr *s
SEQ_LT(seq, tp->snd_max))
notify(inp, errno);
}
- in_pcbsounlock_rele(inp, so);
+ in_pcbsounlock(inp, so);
in_pcbunref(inp);
if (tp == NULL &&
Index: netinet/tcp_timer.c
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/tcp_timer.c,v
diff -u -p -r1.83 tcp_timer.c
--- netinet/tcp_timer.c 12 Feb 2025 21:28:11 -0000 1.83
+++ netinet/tcp_timer.c 21 May 2025 04:50:15 -0000
@@ -91,7 +91,7 @@ tcp_timer_enter(struct inpcb *inp, struc
KASSERT(timer < TCPT_NTIMERS);
NET_LOCK_SHARED();
- *so = in_pcbsolock_ref(inp);
+ *so = in_pcbsolock(inp);
if (*so == NULL) {
*tp = NULL;
return -1;
@@ -109,7 +109,7 @@ tcp_timer_enter(struct inpcb *inp, struc
static inline void
tcp_timer_leave(struct inpcb *inp, struct socket *so)
{
- in_pcbsounlock_rele(inp, so);
+ in_pcbsounlock(inp, so);
NET_UNLOCK_SHARED();
in_pcbunref(inp);
}
@@ -237,7 +237,7 @@ tcp_timer_rexmt(void *arg)
sin.sin_family = AF_INET;
sin.sin_addr = inp->inp_faddr;
- in_pcbsounlock_rele(inp, so);
+ in_pcbsounlock(inp, so);
in_pcbunref(inp);
icmp_mtudisc(&icmp, rtableid);
Index: netinet/tcp_usrreq.c
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/tcp_usrreq.c,v
diff -u -p -r1.247 tcp_usrreq.c
--- netinet/tcp_usrreq.c 20 May 2025 18:41:06 -0000 1.247
+++ netinet/tcp_usrreq.c 21 May 2025 04:50:15 -0000
@@ -1214,7 +1214,7 @@ tcp_ident(void *oldp, size_t *oldlenp, v
struct tcpcb *tp = NULL;
if (inp != NULL) {
- so = in_pcbsolock_ref(inp);
+ so = in_pcbsolock(inp);
if (so != NULL)
tp = intotcpcb(inp);
}
@@ -1223,7 +1223,7 @@ tcp_ident(void *oldp, size_t *oldlenp, v
else
error = ESRCH;
- in_pcbsounlock_rele(inp, so);
+ in_pcbsounlock(inp, so);
NET_UNLOCK_SHARED();
in_pcbunref(inp);
return (error);
@@ -1246,7 +1246,7 @@ tcp_ident(void *oldp, size_t *oldlenp, v
}
if (inp != NULL)
- so = in_pcbsolock_ref(inp);
+ so = in_pcbsolock(inp);
if (so != NULL && ISSET(so->so_state, SS_CONNECTOUT)) {
tir.ruid = so->so_ruid;
@@ -1256,7 +1256,7 @@ tcp_ident(void *oldp, size_t *oldlenp, v
tir.euid = -1;
}
- in_pcbsounlock_rele(inp, so);
+ in_pcbsounlock(inp, so);
NET_UNLOCK_SHARED();
in_pcbunref(inp);
Index: netinet/udp_usrreq.c
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/udp_usrreq.c,v
diff -u -p -r1.337 udp_usrreq.c
--- netinet/udp_usrreq.c 12 May 2025 17:21:21 -0000 1.337
+++ netinet/udp_usrreq.c 21 May 2025 04:50:15 -0000
@@ -921,10 +921,10 @@ udp_ctlinput(int cmd, struct sockaddr *s
ip->ip_dst, uhp->uh_dport, ip->ip_src, uhp->uh_sport,
rdomain);
if (inp != NULL)
- so = in_pcbsolock_ref(inp);
+ so = in_pcbsolock(inp);
if (so != NULL)
notify(inp, errno);
- in_pcbsounlock_rele(inp, so);
+ in_pcbsounlock(inp, so);
in_pcbunref(inp);
} else
in_pcbnotifyall(&udbtable, satosin(sa), rdomain, errno, notify);
Index: netinet6/in6_pcb.c
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/netinet6/in6_pcb.c,v
diff -u -p -r1.148 in6_pcb.c
--- netinet6/in6_pcb.c 4 May 2025 23:05:17 -0000 1.148
+++ netinet6/in6_pcb.c 21 May 2025 04:50:15 -0000
@@ -546,10 +546,10 @@ in6_pcbnotify(struct inpcbtable *table,
}
do_notify:
mtx_leave(&table->inpt_mtx);
- so = in_pcbsolock_ref(inp);
+ so = in_pcbsolock(inp);
if (so != NULL)
(*notify)(inp, errno);
- in_pcbsounlock_rele(inp, so);
+ in_pcbsounlock(inp, so);
mtx_enter(&table->inpt_mtx);
}
mtx_leave(&table->inpt_mtx);
incpb socket ref count