From: Vitaliy Makkoveev Subject: Re: Unsplice socket before start the teardown To: Alexander Bluhm Cc: tech@openbsd.org Date: Fri, 16 Feb 2024 11:44:43 +0300 Sorry for this broken diff. I was blindly sure in tcp_detach() behaviour and didn't tested this version. I did the (*pr_usrreq)(), so it's strange that I forgot about tcp(4). This diff could be fixed by moving SS_NOFDREF bit setting after the spliced socket barriers. So if concurrent thread would kill this tcp(4) socket, it only destroys PCB, but left `so' alive. I wanted to move unsplicing from sofree() because the unlocked sblock() should be called before solock(). Since the solock() is held within sofree(), it should be released, so I want to remove re-locking from the stack. With the unlocked sblock() the solock() is missing in the soreceive() path(), so sblock(so->so_rcv) used to serialize with spliced socket paths. This is the fixed diff. The regress/sys/kern/sosplice/ performed without regressions. I'm fine to delay this diff until tcp(4) or udp(4) soreceive() unlocking. Index: sys/kern/uipc_socket.c =================================================================== RCS file: /cvs/src/sys/kern/uipc_socket.c,v retrieving revision 1.320 diff -u -p -r1.320 uipc_socket.c --- sys/kern/uipc_socket.c 12 Feb 2024 22:48:27 -0000 1.320 +++ sys/kern/uipc_socket.c 16 Feb 2024 08:39:43 -0000 @@ -62,8 +62,6 @@ int sosplice(struct socket *, int, off_t void sounsplice(struct socket *, struct socket *, int); void soidle(void *); void sotask(void *); -void soreaper(void *); -void soput(void *); int somove(struct socket *, int); void sorflush(struct socket *); @@ -250,8 +248,6 @@ solisten(struct socket *so, int backlog) return (0); } -#define SOSP_FREEING_READ 1 -#define SOSP_FREEING_WRITE 2 void sofree(struct socket *so, int keep_lock) { @@ -316,36 +312,11 @@ sofree(struct socket *so, int keep_lock) sigio_free(&so->so_sigio); klist_free(&so->so_rcv.sb_klist); klist_free(&so->so_snd.sb_klist); -#ifdef SOCKET_SPLICE - if (issplicedback(so)) { - int freeing = SOSP_FREEING_WRITE; - - if (so->so_sp->ssp_soback == so) - freeing |= SOSP_FREEING_READ; - sounsplice(so->so_sp->ssp_soback, so, freeing); - } - if (isspliced(so)) { - int freeing = SOSP_FREEING_READ; - - if (so == so->so_sp->ssp_socket) - freeing |= SOSP_FREEING_WRITE; - sounsplice(so, so->so_sp->ssp_socket, freeing); - } -#endif /* SOCKET_SPLICE */ sbrelease(so, &so->so_snd); sorflush(so); if (!keep_lock) sounlock(so); -#ifdef SOCKET_SPLICE - if (so->so_sp) { - /* Reuse splice idle, sounsplice() has been called before. */ - timeout_set_proc(&so->so_sp->ssp_idleto, soreaper, so); - timeout_add(&so->so_sp->ssp_idleto, 0); - } else -#endif /* SOCKET_SPLICE */ - { - pool_put(&socket_pool, so); - } + pool_put(&socket_pool, so); } static inline uint64_t @@ -357,6 +328,8 @@ solinger_nsec(struct socket *so) return SEC_TO_NSEC(so->so_linger); } +#define SOSP_FREEING_READ 1 +#define SOSP_FREEING_WRITE 2 /* * Close a socket on last file table reference removal. * Initiate disconnect if connected. @@ -433,9 +406,39 @@ drop: } } discard: +#ifdef SOCKET_SPLICE + if (issplicedback(so)) { + int freeing = SOSP_FREEING_WRITE; + + if (so->so_sp->ssp_soback == so) + freeing |= SOSP_FREEING_READ; + sounsplice(so->so_sp->ssp_soback, so, freeing); + } + if (isspliced(so)) { + int freeing = SOSP_FREEING_READ; + + if (so == so->so_sp->ssp_socket) + freeing |= SOSP_FREEING_WRITE; + sounsplice(so, so->so_sp->ssp_socket, freeing); + } + if (so->so_sp) { + /* + * Safe to unlock. Concurrent soclose() thread + * will destroy PCB but left `so' alive because + * SS_NOFDREF bit is clean. + */ + sounlock(so); + taskq_barrier(sosplice_taskq); + timeout_del_barrier(&so->so_sp->ssp_idleto); + pool_put(&sosplice_pool, so->so_sp); + solock(so); + } +#endif /* SOCKET_SPLICE */ + if (so->so_state & SS_NOFDREF) panic("soclose NOFDREF: so %p, so_type %d", so, so->so_type); so->so_state |= SS_NOFDREF; + /* sofree() calls sounlock(). */ sofree(so, 0); return (error); @@ -1441,32 +1444,6 @@ sotask(void *arg) /* Avoid user land starvation. */ yield(); -} - -/* - * The socket splicing task or idle timeout may sleep while grabbing the net - * lock. As sofree() can be called anytime, sotask() or soidle() could access - * the socket memory of a freed socket after wakeup. So delay the pool_put() - * after all pending socket splicing tasks or timeouts have finished. Do this - * by scheduling it on the same threads. - */ -void -soreaper(void *arg) -{ - struct socket *so = arg; - - /* Reuse splice task, sounsplice() has been called before. */ - task_set(&so->so_sp->ssp_task, soput, so); - task_add(sosplice_taskq, &so->so_sp->ssp_task); -} - -void -soput(void *arg) -{ - struct socket *so = arg; - - pool_put(&sosplice_pool, so->so_sp); - pool_put(&socket_pool, so); } /*