Download raw body.
Kill `inp_notify' list remains
This was the list, where PCBs were temporary linked to avoid sleep with
`inpt_mtx' mutex(9) held. in_pcbnotifyall() and in6_pcbnotify are the
last list users, so I propose to switch them to in_pcb_iterator() too,
moreover they already do in_pcb_is_iterator() check.
Note, in_pcb_iterator() does in_pcbref() and in_pcbunref() for
corresponding `inp', unlocked dereference is safe and no extra
reference release required.
Outside divert(4) sockets search loop, sysctl_file() is the only
`inp_queue' foreach loops runner. It takes exclusive netlock and holds
it while processing all inet PCB tables. I want to switch these loops to
the iterators and push shared solock into `inp_queue' loops.
From this:
NET_LOCK();
mtx_enter(&tcbtable.inpt_mtx);
TAILQ_FOREACH(inp, &tcbtable.inpt_queue, inp_queue)
FILLSO(inp->inp_socket);
mtx_leave(&tcbtable.inpt_mtx);
to this:
mtx_enter(&tcbtable.inpt_mtx);
while ((inp = in_pcb_iterator(&tcbtable, inp, &iter)) != NULL) {
mtx_leave(&tcbtable.inpt_mtx);
NET_LOCK_SHARED();
if (inp->inp_socket)
FILLSO(inp->inp_socket);
NET_UNLOCK_SHARED();
mtx_enter(&tcbtable.inpt_mtx);
}
mtx_leave(&tcbtable.inpt_mtx);
In the future, net lock could be replaced with solock() and pushed down
to FILLSO(), so copyout() could be moved outside net lock too.
Index: sys/netinet/in_pcb.c
===================================================================
RCS file: /cvs/src/sys/netinet/in_pcb.c,v
diff -u -p -r1.305 in_pcb.c
--- sys/netinet/in_pcb.c 5 Nov 2024 22:44:20 -0000 1.305
+++ sys/netinet/in_pcb.c 20 Dec 2024 01:01:19 -0000
@@ -179,7 +179,6 @@ void
in_pcbinit(struct inpcbtable *table, int hashsize)
{
mtx_init(&table->inpt_mtx, IPL_SOFTNET);
- rw_init(&table->inpt_notify, "inpnotify");
TAILQ_INIT(&table->inpt_queue);
table->inpt_hashtbl = hashinit(hashsize, M_PCB, M_WAITOK,
&table->inpt_mask);
@@ -763,8 +762,8 @@ void
in_pcbnotifyall(struct inpcbtable *table, const struct sockaddr_in *dst,
u_int rtable, int errno, void (*notify)(struct inpcb *, int))
{
- SIMPLEQ_HEAD(, inpcb) inpcblist;
- struct inpcb *inp;
+ struct inpcb_iterator iter = { .inp_table = NULL };
+ struct inpcb *inp = NULL;
u_int rdomain;
if (dst->sin_addr.s_addr == INADDR_ANY)
@@ -772,39 +771,20 @@ in_pcbnotifyall(struct inpcbtable *table
if (notify == NULL)
return;
- /*
- * Use a temporary notify list protected by rwlock to run over
- * selected PCB. This is necessary as the list of all PCB is
- * protected by a mutex. Notify may call ip_output() eventually
- * which may sleep as pf lock is a rwlock. Also the SRP
- * implementation of the routing table might sleep.
- * The same inp_notify list entry and inpt_notify rwlock are
- * used for UDP multicast and raw IP delivery.
- */
- SIMPLEQ_INIT(&inpcblist);
rdomain = rtable_l2(rtable);
- rw_enter_write(&table->inpt_notify);
mtx_enter(&table->inpt_mtx);
- TAILQ_FOREACH(inp, &table->inpt_queue, inp_queue) {
- if (in_pcb_is_iterator(inp))
- continue;
+ while ((inp = in_pcb_iterator(table, inp, &iter)) != NULL) {
KASSERT(!ISSET(inp->inp_flags, INP_IPV6));
if (inp->inp_faddr.s_addr != dst->sin_addr.s_addr ||
rtable_l2(inp->inp_rtableid) != rdomain) {
continue;
}
- in_pcbref(inp);
- SIMPLEQ_INSERT_TAIL(&inpcblist, inp, inp_notify);
- }
- mtx_leave(&table->inpt_mtx);
-
- while ((inp = SIMPLEQ_FIRST(&inpcblist)) != NULL) {
- SIMPLEQ_REMOVE_HEAD(&inpcblist, inp_notify);
+ mtx_leave(&table->inpt_mtx);
(*notify)(inp, errno);
- in_pcbunref(inp);
+ mtx_enter(&table->inpt_mtx);
}
- rw_exit_write(&table->inpt_notify);
+ mtx_leave(&table->inpt_mtx);
}
/*
Index: sys/netinet/in_pcb.h
===================================================================
RCS file: /cvs/src/sys/netinet/in_pcb.h,v
diff -u -p -r1.159 in_pcb.h
--- sys/netinet/in_pcb.h 5 Nov 2024 10:49:23 -0000 1.159
+++ sys/netinet/in_pcb.h 20 Dec 2024 01:01:19 -0000
@@ -79,7 +79,6 @@
* I immutable after creation
* N net lock
* t inpt_mtx pcb table mutex
- * y inpt_notify pcb table rwlock for notify
* L pf_inp_mtx link pf to inp mutex
* s so_lock socket rwlock
*/
@@ -128,7 +127,6 @@ struct inpcb {
LIST_ENTRY(inpcb) inp_hash; /* [t] local and foreign hash */
LIST_ENTRY(inpcb) inp_lhash; /* [t] local port hash */
TAILQ_ENTRY(inpcb) inp_queue; /* [t] inet PCB queue */
- SIMPLEQ_ENTRY(inpcb) inp_notify; /* [y] notify or udp append */
struct inpcbtable *inp_table; /* [I] inet queue/hash table */
union inpaddru inp_faddru; /* [t] Foreign address. */
union inpaddru inp_laddru; /* [t] Local address. */
@@ -194,7 +192,6 @@ in_pcb_is_iterator(struct inpcb *inp)
struct inpcbtable {
struct mutex inpt_mtx; /* protect queue and hash */
- struct rwlock inpt_notify; /* protect inp_notify list */
TAILQ_HEAD(inpthead, inpcb) inpt_queue; /* [t] inet PCB queue */
struct inpcbhead *inpt_hashtbl; /* [t] local and foreign hash */
struct inpcbhead *inpt_lhashtbl; /* [t] local port hash */
Index: sys/netinet6/in6_pcb.c
===================================================================
RCS file: /cvs/src/sys/netinet6/in6_pcb.c,v
diff -u -p -r1.145 in6_pcb.c
--- sys/netinet6/in6_pcb.c 5 Nov 2024 10:49:23 -0000 1.145
+++ sys/netinet6/in6_pcb.c 20 Dec 2024 01:01:19 -0000
@@ -427,8 +427,8 @@ in6_pcbnotify(struct inpcbtable *table,
uint fport_arg, const struct sockaddr_in6 *src, uint lport_arg,
u_int rtable, int cmd, void *cmdarg, void (*notify)(struct inpcb *, int))
{
- SIMPLEQ_HEAD(, inpcb) inpcblist;
- struct inpcb *inp;
+ struct inpcb_iterator iter = { .inp_table = NULL };
+ struct inpcb *inp = NULL;
u_short fport = fport_arg, lport = lport_arg;
struct sockaddr_in6 sa6_src;
int errno;
@@ -474,13 +474,9 @@ in6_pcbnotify(struct inpcbtable *table,
if (notify == NULL)
return;
- SIMPLEQ_INIT(&inpcblist);
rdomain = rtable_l2(rtable);
- rw_enter_write(&table->inpt_notify);
mtx_enter(&table->inpt_mtx);
- TAILQ_FOREACH(inp, &table->inpt_queue, inp_queue) {
- if (in_pcb_is_iterator(inp))
- continue;
+ while ((inp = in_pcb_iterator(table, inp, &iter)) != NULL) {
KASSERT(ISSET(inp->inp_flags, INP_IPV6));
/*
@@ -546,17 +542,11 @@ in6_pcbnotify(struct inpcbtable *table,
continue;
}
do_notify:
- in_pcbref(inp);
- SIMPLEQ_INSERT_TAIL(&inpcblist, inp, inp_notify);
- }
- mtx_leave(&table->inpt_mtx);
-
- while ((inp = SIMPLEQ_FIRST(&inpcblist)) != NULL) {
- SIMPLEQ_REMOVE_HEAD(&inpcblist, inp_notify);
+ mtx_leave(&table->inpt_mtx);
(*notify)(inp, errno);
- in_pcbunref(inp);
+ mtx_enter(&table->inpt_mtx);
}
- rw_exit_write(&table->inpt_notify);
+ mtx_leave(&table->inpt_mtx);
}
struct rtentry *
Kill `inp_notify' list remains