Download raw body.
tcp input socket lock
Hi,
In preparation to run TCP input in parallel, we have to lock the
socket. After inpcb lookup, also take the socket lock and increase
the socket reference count. syn_cache_get() unlocks the listen
socket and returns a locked socket. syn_cache_add() relies on
socket lock.
With this diff, exclusive net lock is still held. It might get a
little slower due to the additional mutex and refcount. But I want
to see if locking and refcount works before switching tcp_input()
to shared net lock.
ok?
bluhm
Index: netinet/tcp_input.c
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/tcp_input.c,v
diff -u -p -r1.434 tcp_input.c
--- netinet/tcp_input.c 10 Mar 2025 15:11:46 -0000 1.434
+++ netinet/tcp_input.c 13 Apr 2025 13:28:28 -0000
@@ -603,6 +603,11 @@ findpcb:
tcpstat_inc(tcps_noport);
goto dropwithreset_ratelim;
}
+ so = in_pcbsolock_ref(inp);
+ if (so == NULL) {
+ tcpstat_inc(tcps_noport);
+ goto dropwithreset_ratelim;
+ }
KASSERT(sotoinpcb(inp->inp_socket) == inp);
KASSERT(intotcpcb(inp) == NULL || intotcpcb(inp)->t_inpcb == inp);
@@ -635,7 +640,6 @@ findpcb:
else
tiwin = th->th_win;
- so = inp->inp_socket;
if (so->so_options & (SO_DEBUG|SO_ACCEPTCONN)) {
union syn_cache_sa src;
union syn_cache_sa dst;
@@ -724,6 +728,7 @@ findpcb:
* in use for the reply,
* do not free it.
*/
+ so = NULL;
m = *mp = NULL;
goto drop;
} else {
@@ -731,13 +736,11 @@ findpcb:
* We have created a
* full-blown connection.
*/
- tp = NULL;
in_pcbunref(inp);
inp = in_pcbref(sotoinpcb(so));
tp = intotcpcb(inp);
if (tp == NULL)
goto badsyn; /*XXX*/
-
}
break;
@@ -843,6 +846,7 @@ findpcb:
tcpstat_inc(tcps_dropsyn);
goto drop;
}
+ in_pcbsounlock_rele(inp, so);
in_pcbunref(inp);
return IPPROTO_DONE;
}
@@ -1018,6 +1022,7 @@ findpcb:
if (so->so_snd.sb_cc ||
tp->t_flags & TF_NEEDOUTPUT)
(void) tcp_output(tp);
+ in_pcbsounlock_rele(inp, so);
in_pcbunref(inp);
return IPPROTO_DONE;
}
@@ -1068,6 +1073,7 @@ findpcb:
tp->t_flags &= ~TF_BLOCKOUTPUT;
if (tp->t_flags & (TF_ACKNOW|TF_NEEDOUTPUT))
(void) tcp_output(tp);
+ in_pcbsounlock_rele(inp, so);
in_pcbunref(inp);
return IPPROTO_DONE;
}
@@ -1261,6 +1267,8 @@ trimthenstep6:
((arc4random() & 0x7fffffff) | 0x8000);
reuse = &iss;
tp = tcp_close(tp);
+ in_pcbsounlock_rele(inp, so);
+ so = NULL;
in_pcbunref(inp);
inp = NULL;
goto findpcb;
@@ -2065,6 +2073,7 @@ dodata: /* XXX */
*/
if (tp->t_flags & (TF_ACKNOW|TF_NEEDOUTPUT))
(void) tcp_output(tp);
+ in_pcbsounlock_rele(inp, so);
in_pcbunref(inp);
return IPPROTO_DONE;
@@ -2094,6 +2103,7 @@ dropafterack:
m_freem(m);
tp->t_flags |= TF_ACKNOW;
(void) tcp_output(tp);
+ in_pcbsounlock_rele(inp, so);
in_pcbunref(inp);
return IPPROTO_DONE;
@@ -2129,6 +2139,7 @@ dropwithreset:
(tcp_seq)0, TH_RST|TH_ACK, m->m_pkthdr.ph_rtableid, now);
}
m_freem(m);
+ in_pcbsounlock_rele(inp, so);
in_pcbunref(inp);
return IPPROTO_DONE;
@@ -2140,6 +2151,7 @@ drop:
tcp_trace(TA_DROP, ostate, tp, otp, &saveti.caddr, 0, tlen);
m_freem(m);
+ in_pcbsounlock_rele(inp, so);
in_pcbunref(inp);
return IPPROTO_DONE;
}
@@ -3543,6 +3555,7 @@ syn_cache_get(struct sockaddr *src, stru
sc = syn_cache_lookup(src, dst, &scp, inp->inp_rtableid);
if (sc == NULL) {
mtx_leave(&syn_cache_mtx);
+ in_pcbsounlock_rele(inp, so);
return (NULL);
}
@@ -3556,6 +3569,7 @@ syn_cache_get(struct sockaddr *src, stru
refcnt_take(&sc->sc_refcnt);
mtx_leave(&syn_cache_mtx);
(void) syn_cache_respond(sc, m, now, do_ecn);
+ in_pcbsounlock_rele(inp, so);
syn_cache_put(sc);
return ((struct socket *)(-1));
}
@@ -3696,7 +3710,7 @@ syn_cache_get(struct sockaddr *src, stru
tp->rcv_adv = tp->rcv_nxt + sc->sc_win;
tp->last_ack_sent = tp->rcv_nxt;
- in_pcbsounlock_rele(inp, so);
+ in_pcbsounlock_rele(listeninp, listenso);
tcpstat_inc(tcps_sc_completed);
syn_cache_put(sc);
return (so);
@@ -3709,6 +3723,7 @@ abort:
tp = tcp_drop(tp, ECONNABORTED); /* destroys socket */
m_freem(m);
in_pcbsounlock_rele(inp, so);
+ in_pcbsounlock_rele(listeninp, listenso);
syn_cache_put(sc);
tcpstat_inc(tcps_sc_aborted);
return ((struct socket *)(-1));
@@ -3813,7 +3828,7 @@ syn_cache_add(struct sockaddr *src, stru
struct mbuf *ipopts;
struct rtentry *rt = NULL;
- NET_ASSERT_LOCKED();
+ soassertlocked(so);
tp = sototcpcb(so);
@@ -3989,9 +4004,8 @@ syn_cache_add(struct sockaddr *src, stru
if (syn_cache_respond(sc, m, now, do_ecn) == 0) {
mtx_enter(&syn_cache_mtx);
/*
- * XXXSMP Currently exclusive netlock prevents another insert
- * after our syn_cache_lookup() and before syn_cache_insert().
- * Double insert should be handled and not rely on netlock.
+ * Socket lock prevents another insert after our
+ * syn_cache_lookup() and before syn_cache_insert().
*/
syn_cache_insert(sc, tp);
mtx_leave(&syn_cache_mtx);
tcp input socket lock