Index | Thread | Search

From:
Alexander Bluhm <bluhm@openbsd.org>
Subject:
Re: mbuf cluster m_extref_mtx contention
To:
David Gwynne <david@gwynne.id.au>
Cc:
tech@openbsd.org
Date:
Fri, 24 Apr 2026 16:43:03 +0200

Download raw body.

Thread
  • Alexander Bluhm:

    mbuf cluster m_extref_mtx contention

  • On Wed, Apr 22, 2026 at 04:21:42PM +1000, David Gwynne wrote:
    > from another perspective, if the use after free is the result
    > mishandling mbufs, then the other diff is more forgiving of a buggy
    > behaviour.
    
    I still see stability problems with the extfree proxy diff.  It
    only happens when doing creating kstack flamegraphs with btrace.
    Crash is not reliable, needs several test runs.  But I don't see
    crashes without the diff.  Trying to reproduce and collect more
    information.
    
    Stopped at      db_enter+0x14:  popq    %rbp
        TID    PID    UID     PRFLAGS     PFLAGS  CPU  COMMAND
     365692  20485      0    0x100000          0   12  splicebench
     158520  39646      0    0x100000          0   13  splicebench
     456824  46846      0    0x100000          0    9  splicebench
      22375   7927      0    0x100000          0   11  splicebench
     434104  44068      0    0x100000          0    2  splicebench
     378795  93654      0    0x100000          0    7  splicebench
     333691  25747      0    0x100000          0    5  splicebench
     144907  64236      0    0x100000          0    0  splicebench
     490277  24953      0     0x14000      0x200    3  softnet7
     111587  91554      0     0x14000      0x200   14  softnet6
     405206  59298      0     0x14000      0x200    8  softnet5
     169965  54014      0     0x14000      0x200    4  softnet4
      33162  42840      0     0x14000      0x200   10  softnet3
     462975  60233      0     0x14000      0x200    6  softnet2
     275559  33456      0     0x14000      0x200    1  softnet1
     140656  14600      0     0x14000      0x200   15  softnet0
    db_enter() at db_enter+0x14
    panic(ffffffff826734e6) at panic+0xd5
    __assert(ffffffff826ac5d9,ffffffff8264d8c6,b5,ffffffff82669d25) at __assert+0x29
    dal_irq_service_dcn302_create(ffff800002ee0f00) at dal_irq_service_dcn302_create
    clockintr_dispatch(ffff8000639e3b00) at clockintr_dispatch+0x259
    lapic_clockintr(0,0) at lapic_clockintr+0x43
    Xresume_lapic_ltimer() at Xresume_lapic_ltimer+0x2a
    acpicpu_idle() at acpicpu_idle+0x131
    sched_idle(ffff80005a6a7ff0) at sched_idle+0x24b
    end trace frame: 0x0, count: 6
    https://www.openbsd.org/ddb.html describes the minimum info required in bug
    reports.  Insufficient info makes it difficult to find and fix bugs.
    
    ddb{22}> show panic
     cpu0: kernel diagnostic assertion "(sih->sih_state & SIS_RESTART) == 0" failed: file "/usr/src/sys/kern/kern_softintr.c", line 181
    *cpu1: uvm_fault(0xffffffff82bb9060, 0x2026734c0, 0, 1) -> e
     cpu2: kernel diagnostic assertion "(sih->sih_state & (SIS_PENDING | SIS_RESTART)) == SIS_PENDING" failed: file "/usr/src/sys/kern/kern_softintr.c", line 71
     cpu3: kernel diagnostic assertion "(sih->sih_state & (SIS_PENDING | SIS_RESTART)) == SIS_PENDING" failed: file "/usr/src/sys/kern/kern_softintr.c", line 71
     cpu4: kernel diagnostic assertion "(sih->sih_state & SIS_RESTART) == 0" failed: file "/usr/src/sys/kern/kern_softintr.c", line 181
     cpu5: kernel diagnostic assertion "(sih->sih_state & (SIS_PENDING | SIS_RESTART)) == SIS_PENDING" failed: file "/usr/src/sys/kern/kern_softintr.c", line 71
     cpu6: kernel diagnostic assertion "sb->sb_mb == sb->sb_lastrecord" failed: file "/usr/src/sys/kern/uipc_socket2.c", line 831
     cpu7: kernel diagnostic assertion "(sih->sih_state & SIS_RESTART) == 0" failed: file "/usr/src/sys/kern/kern_softintr.c", line 181
     cpu8: kernel diagnostic assertion "(sih->sih_state & SIS_RESTART) == 0" failed: file "/usr/src/sys/kern/kern_softintr.c", line 181
     cpu9: kernel diagnostic assertion "(sih->sih_state & (SIS_PENDING | SIS_RESTART)) == SIS_PENDING" failed: file "/usr/src/sys/kern/kern_softintr.c", line 71
     cpu10: kernel diagnostic assertion "(sih->sih_state & SIS_RESTART) == 0" failed: file "/usr/src/sys/kern/kern_softintr.c", line 181
     cpu11: kernel diagnostic assertion "(sih->sih_state & SIS_RESTART) == 0" failed: file "/usr/src/sys/kern/kern_softintr.c", line 181
     cpu12: receive 1: so 0xffff800003c26130, so_type 1, sb_cc 2896
     cpu13: kernel diagnostic assertion "(sih->sih_state & SIS_RESTART) == 0" failed: file "/usr/src/sys/kern/kern_softintr.c", line 181
     cpu14: kernel diagnostic assertion "(sih->sih_state & (SIS_PENDING | SIS_RESTART)) == SIS_PENDING" failed: file "/usr/src/sys/kern/kern_softintr.c", line 71
     cpu15: kernel diagnostic assertion "(sih->sih_state & SIS_RESTART) == 0" failed: file "/usr/src/sys/kern/kern_softintr.c", line 181
     cpu16: kernel diagnostic assertion "(sih->sih_state & SIS_RESTART) == 0" failed: file "/usr/src/sys/kern/kern_softintr.c", line 181
     cpu17: kernel diagnostic assertion "(sih->sih_state & SIS_RESTART) == 0" failed: file "/usr/src/sys/kern/kern_softintr.c", line 181
     cpu18: kernel diagnostic assertion "(sih->sih_state & SIS_RESTART) == 0" failed: file "/usr/src/sys/kern/kern_softintr.c", line 181
     cpu19: kernel diagnostic assertion "(sih->sih_state & SIS_RESTART) == 0" failed: file "/usr/src/sys/kern/kern_softintr.c", line 181
     cpu20: kernel diagnostic assertion "(sih->sih_state & SIS_RESTART) == 0" failed: file "/usr/src/sys/kern/kern_softintr.c", line 181
     cpu21: kernel diagnostic assertion "(sih->sih_state & SIS_RESTART) == 0" failed: file "/usr/src/sys/kern/kern_softintr.c", line 181
     cpu22: kernel diagnostic assertion "(sih->sih_state & SIS_RESTART) == 0" failed: file "/usr/src/sys/kern/kern_softintr.c", line 181
     cpu23: kernel diagnostic assertion "(sih->sih_state & SIS_RESTART) == 0" failed: file "/usr/src/sys/kern/kern_softintr.c", line 181
     cpu24: kernel diagnostic assertion "(sih->sih_state & (SIS_PENDING | SIS_RESTART)) == SIS_PENDING" failed: file "/usr/src/sys/kern/kern_softintr.c", line 71
     cpu25: kernel diagnostic assertion "(sih->sih_state & SIS_RESTART) == 0" failed: file "/usr/src/sys/kern/kern_softintr.c", line 181
     cpu26: kernel diagnostic assertion "(sih->sih_state & SIS_RESTART) == 0" failed: file "/usr/src/sys/kern/kern_softintr.c", line 181
     cpu27: kernel diagnostic assertion "(sih->sih_state & SIS_RESTART) == 0" failed: file "/usr/src/sys/kern/kern_softintr.c", line 181
     cpu28: kernel diagnostic assertion "(sih->sih_state & SIS_RESTART) == 0" failed: file "/usr/src/sys/kern/kern_softintr.c", line 181
     cpu29: kernel diagnostic assertion "(sih->sih_state & SIS_RESTART) == 0" failed: file "/usr/src/sys/kern/kern_softintr.c", line 181
     cpu30: kernel diagnostic assertion "(sih->sih_state & SIS_RESTART) == 0" failed: file "/usr/src/sys/kern/kern_softintr.c", line 181
     cpu31: kernel diagnostic assertion "(sih->sih_state & (SIS_PENDING | SIS_RESTART)) == SIS_PENDING" failed: file "/usr/src/sys/kern/kern_softintr.c", line 71
    
    ddb{1}> trace
    x86_ipi_db(ffff80005a5eaff0) at x86_ipi_db+0x16
    x86_ipi_handler() at x86_ipi_handler+0x80
    Xresume_lapic_ipi() at Xresume_lapic_ipi+0x27
    x86_bus_space_io_read_1(3f8,5) at x86_bus_space_io_read_1+0x19
    comcnputc(800,20) at comcnputc+0x7f
    cnputc(20) at cnputc+0x47
    db_putchar(70) at db_putchar+0x42a
    kprintf() at kprintf+0x10b
    db_printf(ffffffff8267d611) at db_printf+0x6d
    panic(ffffffff826734e6) at panic+0xf4
    __assert(ffffffff826ac5d9,ffffffff8264d8c6,b5,ffffffff82669d25) at __assert+0x29
    dal_irq_service_dcn302_create(ffff800002ee0f00) at dal_irq_service_dcn302_create
    clockintr_dispatch(ffff800063928ad0) at clockintr_dispatch+0x259
    lapic_clockintr(0,0) at lapic_clockintr+0x43
    Xresume_lapic_ltimer() at Xresume_lapic_ltimer+0x2a
    x86_bus_space_mem_read_4(ffff80005a5e2000,f0) at x86_bus_space_mem_read_4+0x18
    acpihpet_delay(1) at acpihpet_delay+0x42
    comcnputc(800,20) at comcnputc+0xa2
    cnputc(20) at cnputc+0x47
    db_putchar(30) at db_putchar+0x42a
    kprintf() at kprintf+0x12ff
    db_printf(ffffffff82615490) at db_printf+0x6d
    fault(ffffffff826d1a49) at fault+0x97
    kpageflttrap(ffff800063928f50,2026734c0) at kpageflttrap+0x1aa
    kerntrap(ffff800063928f50) at kerntrap+0xcf
    alltraps_kern_meltdown() at alltraps_kern_meltdown+0x7b
    refcnt_rele(fffffd8e5756e690) at refcnt_rele+0x78
    m_extfree_refs(fffffd800b1a8000,800,fffffd8e5756e680) at m_extfree_refs+0x31
    m_free(fffffd8085078600) at m_free+0x170
    sbdrop(ffff800003c275f0,10f8) at sbdrop+0x8b
    tcp_input_solocked(ffff800063929300,ffff80006392930c,fffffd808ab15c00,2,ffff8000639292f8) at tcp_input_solocked+0x2362
    tcp_input_mlist(ffffffff82bafad0,2) at tcp_input_mlist+0x7f
    if_input_process(ffff80000039c048,ffff8000639293d8,1) at if_input_process+0x272
    ifiq_process(ffff800002c86800) at ifiq_process+0xa1
    taskq_thread(ffff80000002c080) at taskq_thread+0x129
    end trace frame: 0x0, count: -35
    
    ddb{1}> ps
       PID     TID   PPID    UID  S       FLAGS  WAIT          COMMAND
     58050  361454  71109      0  2        0x82                btrace
     20485  365692  87224      0  7    0x100000                splicebench
     39646  158520  87224      0  7    0x100000                splicebench
     46846  456824  87224      0  7    0x100000                splicebench
     15574  294431  87224      0  2    0x100000                splicebench
      7927   22375  87224      0  7    0x100000                splicebench
     93328  507813  87224      0  2    0x100000                splicebench
     58465  519843  87224      0  2    0x100000                splicebench
     44068  434104  87224      0  7    0x100000                splicebench
     93654  378795  87224      0  7    0x100000                splicebench
     31731  370468  87224      0  2    0x100000                splicebench
     86434  103344  87224      0  2    0x100000                splicebench
     27510   81545  87224      0  2    0x100000                splicebench
     25747  333691  87224      0  7    0x100000                splicebench
     53981  314709  87224      0  3    0x100000  inet          splicebench
     64236  144907  87224      0  7    0x100000                splicebench
     71109  170280  89841      0  3        0x80  nanoslp       perl
     74634  405119  89841      0  3    0x10008a  kqread        ssh
     87224   21255  89841      0  3    0x100082  kqread        splicebench
     89841  303793  17337      0  3        0x82  piperd        perl
     17337  318736   5426      0  3    0x10008a  sigsusp       ksh
      5426   12123  28275      0  3        0x98  kqread        sshd-session
     28275  273225  70088      0  3        0x92  kqread        sshd-session
     50805   41303      1      0  3    0x100083  ttyin         getty
     24529   87335      1      0  3    0x100083  ttyin         getty
     80654  510830      1      0  3    0x100083  ttyin         getty
     12695  375538      1      0  3    0x100083  ttyin         getty
     79672  176039      1      0  3    0x100083  ttyin         getty
     36233  407260      1      0  3    0x100083  ttyin         getty
     68084  517278      1      0  3    0x100098  kqread        cron
     99602  246314      1     99  3   0x1100090  kqread        sndiod
      7947  118625      1    110  3    0x100090  kqread        sndiod
     83398  144268  65376     95  3   0x1100092  kqread        smtpd
     95991  338364  65376    103  3   0x1100092  kqread        smtpd
     38757  437470  65376     95  3   0x1100092  kqread        smtpd
     77933  309763  65376     95  3    0x100092  kqread        smtpd
     66827   14324  65376     95  3   0x1100092  kqread        smtpd
     68459  268927  65376     95  3   0x1100092  kqread        smtpd
     65376   74460      1      0  3    0x100080  kqread        smtpd
     91114  461520  93636     91  3        0x92  kqread        snmpd_metrics
     65876  362231  93636     91  3   0x1100092  kqread        snmpd
     93636   21862      1      0  3    0x100080  kqread        snmpd
     70088  185108      1      0  3        0x88  kqread        sshd
     52864   91164      0      0  3     0x14200  acct          acct
     50250  288863      0      0  3     0x14280  nfsidl        nfsio
     50176  209335      0      0  3     0x14280  nfsidl        nfsio
     94684  377523      0      0  3     0x14280  nfsidl        nfsio
     65946  508317      0      0  3     0x14280  nfsidl        nfsio
     50420  155428      1      0  3    0x100080  kqread        ntpd
     84238  472656  53678     83  3    0x100092  kqread        ntpd
     53678  368489      1     83  3   0x1100092  kqread        ntpd
     75650  368350  36295     74  3   0x1100092  bpf           pflogd
     36295  256904      1      0  3        0x80  sbwait        pflogd
     78139  297104  54691     73  3   0x1100090  kqread        syslogd
     54691  498524      1      0  3    0x100082  sbwait        syslogd
     44429   76567      1      0  3    0x100080  kqread        resolvd
     20709   51832  38604     77  3    0x100092  kqread        dhcpleased
     84502   56883  38604     77  3    0x100092  kqread        dhcpleased
     38604  171529      1      0  3        0x80  kqread        dhcpleased
     80043  500292  30287    115  3    0x100092  kqread        slaacd
     25595  354301  30287    115  3    0x100092  kqread        slaacd
     30287   36766      1      0  3    0x100080  kqread        slaacd
     58000   92819      0      0  3     0x14200  bored         smr
     52409  291605      0      0  3     0x14200  pgzero        zerothread
     41314  354360      0      0  3     0x14200  aiodoned      aiodoned
     54456  343610      0      0  3     0x14200  syncer        update
     67784  394946      0      0  3     0x14200  cleaner       cleaner
     19316  295810      0      0  3     0x14200  reaper        reaper
     60601  425075      0      0  3     0x14200  pgdaemon      pagedaemon
      1544  483302      0      0  3     0x14200  bored         wsdisplay0
     65239  166868      0      0  3     0x14200  usbtsk        usbtask
      1101  215935      0      0  3     0x14200  usbatsk       usbatsk
     42350  400898      0      0  3  0x40014200  acpi0         acpi0
     41669  310274      0      0  7  0x40014200                idle31
     91470  404675      0      0  7  0x40014200                idle30
     95989   90446      0      0  7  0x40014200                idle29
     70351  154384      0      0  7  0x40014200                idle28
     28814  502486      0      0  7  0x40014200                idle27
     48633   69788      0      0  7  0x40014200                idle26
     14401  390864      0      0  7  0x40014200                idle25
     19695  330551      0      0  7  0x40014200                idle24
     55843  470262      0      0  7  0x40014200                idle23
     80609  404760      0      0  7  0x40014200                idle22
     77537   71178      0      0  7  0x40014200                idle21
     51805   37231      0      0  7  0x40014200                idle20
     50261   72855      0      0  7  0x40014200                idle19
     82735  412108      0      0  7  0x40014200                idle18
     42744  363135      0      0  7  0x40014200                idle17
     27828   50260      0      0  7  0x40014200                idle16
      8054   46800      0      0  3  0x40014200                idle15
     76185  398551      0      0  3  0x40014200                idle14
     75077  113551      0      0  3  0x40014200                idle13
     20685  452403      0      0  3  0x40014200                idle12
      7685  492379      0      0  3  0x40014200                idle11
     49316  197614      0      0  3  0x40014200                idle10
     10436  303108      0      0  3  0x40014200                idle9
     60385  172713      0      0  3  0x40014200                idle8
     82749  152815      0      0  3  0x40014200                idle7
     98174  487337      0      0  3  0x40014200                idle6
     30446   79273      0      0  3  0x40014200                idle5
     87509   11375      0      0  3  0x40014200                idle4
     65756   63368      0      0  3  0x40014200                idle3
     99429  430206      0      0  3  0x40014200                idle2
     97717  337576      0      0  3  0x40014200                idle1
     92097   96322      0      0  3     0x14200  bored         sensors
     24953  490277      0      0  7     0x14200                softnet7
     91554  111587      0      0  7     0x14200                softnet6
     59298  405206      0      0  7     0x14200                softnet5
     54014  169965      0      0  7     0x14200                softnet4
     42840   33162      0      0  7     0x14200                softnet3
     60233  462975      0      0  7     0x14200                softnet2
    *33456  275559      0      0  7     0x14200                softnet1
     14600  140656      0      0  7     0x14200                softnet0
     94996  376490      0      0  3     0x14200  bored         systqmp
     87967   81803      0      0  3     0x14200  bored         systq
     91939  502639      0      0  3     0x14200  tmoslp        softclockmp
     67768  365183      0      0  3  0x40014200  tmoslp        softclock
     28080   21713      0      0  3  0x40014200                idle0
         1   17636      0      0  3        0x82  wait          init
         0       0     -1      0  3     0x10200  scheduler     swapper
    
    ddb{1}> show register
    rdi               0xffff80005a5eaff0
    rsi                                0
    rbp               0xffff8000639285d0
    rbx               0xffffffff82aaf738    ipifunc+0x38
    rdx                                0
    rcx                              0x7
    rax                       0xffffff7f
    r8                                 0
    r9                                 0
    r10                                0
    r11               0x73176215a49ffd46
    r12                              0x7
    r13                              0xc
    r14               0xffff80005a5eaff0
    r15                                0
    rip               0xffffffff81ee5bd6    x86_ipi_db+0x16
    cs                               0x8
    rflags                         0x202
    rsp               0xffff8000639285c0
    ss                              0x10
    x86_ipi_db+0x16:        leave
    
    bluhm
    
    > Index: kern/uipc_mbuf.c
    > ===================================================================
    > RCS file: /cvs/src/sys/kern/uipc_mbuf.c,v
    > diff -u -p -r1.304 uipc_mbuf.c
    > --- kern/uipc_mbuf.c	5 Feb 2026 03:26:00 -0000	1.304
    > +++ kern/uipc_mbuf.c	22 Apr 2026 06:04:19 -0000
    > @@ -123,9 +123,21 @@ int max_linkhdr;		/* largest link-level 
    >  int max_protohdr;		/* largest protocol header */
    >  int max_hdr;			/* largest link+protocol header */
    >  
    > -struct	mutex m_extref_mtx = MUTEX_INITIALIZER(IPL_NET);
    > +struct m_ext_refs {
    > +	void		*arg;
    > +	u_int		 free_fn;
    > +	u_int		 zero;
    > +	struct refcnt	 refs;
    > +};
    > +
    > +static struct pool	m_ext_refs_pool;
    > +
    > +static void		m_extfree_refs(caddr_t, u_int, void *);
    > +u_int			m_extfree_refs_fn;
    > +
    > +static int		m_extref(struct mbuf *, struct mbuf *, int);
    > +static void		m_extfree(struct mbuf *);
    >  
    > -void	m_extfree(struct mbuf *);
    >  void	m_zero(struct mbuf *);
    >  
    >  unsigned long mbuf_mem_limit;	/* [a] how much memory can be allocated */
    > @@ -174,6 +186,8 @@ mbinit(void)
    >  
    >  	pool_init(&mtagpool, PACKET_TAG_MAXSIZE + sizeof(struct m_tag), 0,
    >  	    IPL_NET, 0, "mtagpl", NULL);
    > +	pool_init(&m_ext_refs_pool, sizeof(struct m_ext_refs), CACHELINESIZE,
    > +	    IPL_NET, 0, "mextrefs", NULL);
    >  
    >  	for (i = 0; i < nitems(mclsizes); i++) {
    >  		lowbits = mclsizes[i] & ((1 << 10) - 1);
    > @@ -193,6 +207,7 @@ mbinit(void)
    >  
    >  	(void)mextfree_register(m_extfree_pool);
    >  	KASSERT(num_extfree_fns == 1);
    > +	m_extfree_refs_fn = mextfree_register(m_extfree_refs);
    >  }
    >  
    >  void
    > @@ -204,6 +219,7 @@ mbcpuinit(void)
    >  
    >  	pool_cache_init(&mbpool);
    >  	pool_cache_init(&mtagpool);
    > +	pool_cache_init(&m_ext_refs_pool);
    >  
    >  	for (i = 0; i < nitems(mclsizes); i++)
    >  		pool_cache_init(&mclpools[i]);
    > @@ -399,6 +415,32 @@ m_extfree_pool(caddr_t buf, u_int size, 
    >  	pool_put(pp, buf);
    >  }
    >  
    > +int
    > +m_ext_refs_shared(struct mbuf *m)
    > +{
    > +	struct m_ext_refs *mrefs = m->m_ext.ext_arg;
    > +
    > +	return (refcnt_shared(&mrefs->refs));
    > +}
    > +
    > +static void
    > +m_extfree_refs(caddr_t buf, u_int size, void *arg)
    > +{
    > +	struct m_ext_refs *mrefs = arg;
    > +
    > +	if (refcnt_rele(&mrefs->refs)) {
    > +		if (mrefs->zero)
    > +			explicit_bzero(buf, size);
    > +
    > +		KASSERT(mrefs->free_fn < num_extfree_fns);
    > +		KASSERT(mrefs->free_fn != m_extfree_refs_fn);
    > +
    > +		mextfree_fns[mrefs->free_fn](buf, size, mrefs->arg);
    > +
    > +		pool_put(&m_ext_refs_pool, mrefs);
    > +	}
    > +}
    > +
    >  struct mbuf *
    >  m_free(struct mbuf *m)
    >  {
    > @@ -434,44 +476,33 @@ m_free(struct mbuf *m)
    >  	return (n);
    >  }
    >  
    > -void
    > -m_extref(struct mbuf *o, struct mbuf *n)
    > +static int
    > +m_extref(struct mbuf *m, struct mbuf *n, int how)
    >  {
    > -	int refs = MCLISREFERENCED(o);
    > +	struct m_ext_refs *mrefs;
    >  
    > -	n->m_flags |= o->m_flags & (M_EXT|M_EXTWR);
    > +	if (m->m_ext.ext_free_fn == m_extfree_refs_fn)
    > +		mrefs = m->m_ext.ext_arg;
    > +	else {
    > +		mrefs = pool_get(&m_ext_refs_pool, how);
    > +		if (mrefs == NULL)
    > +			return (ENOMEM);
    >  
    > -	if (refs)
    > -		mtx_enter(&m_extref_mtx);
    > -	n->m_ext.ext_nextref = o->m_ext.ext_nextref;
    > -	n->m_ext.ext_prevref = o;
    > -	o->m_ext.ext_nextref = n;
    > -	n->m_ext.ext_nextref->m_ext.ext_prevref = n;
    > -	if (refs)
    > -		mtx_leave(&m_extref_mtx);
    > -
    > -	MCLREFDEBUGN((n), __FILE__, __LINE__);
    > -}
    > +		refcnt_init(&mrefs->refs);
    > +		mrefs->arg = m->m_ext.ext_arg;
    > +		mrefs->free_fn = m->m_ext.ext_free_fn;
    > +		mrefs->zero = 0;
    >  
    > -static inline u_int
    > -m_extunref(struct mbuf *m)
    > -{
    > -	int refs = 0;
    > +		m->m_ext.ext_arg = mrefs;
    > +		m->m_ext.ext_free_fn = m_extfree_refs_fn;
    > +	}
    >  
    > -	if (!MCLISREFERENCED(m))
    > -		return (0);
    > +	refcnt_take(&mrefs->refs);
    >  
    > -	mtx_enter(&m_extref_mtx);
    > -	if (MCLISREFERENCED(m)) {
    > -		m->m_ext.ext_nextref->m_ext.ext_prevref =
    > -		    m->m_ext.ext_prevref;
    > -		m->m_ext.ext_prevref->m_ext.ext_nextref =
    > -		    m->m_ext.ext_nextref;
    > -		refs = 1;
    > -	}
    > -	mtx_leave(&m_extref_mtx);
    > +	MEXTADD(n, m->m_ext.ext_buf, m->m_ext.ext_size,
    > +	    m->m_flags & M_EXTWR, m_extfree_refs_fn, mrefs);
    >  
    > -	return (refs);
    > +	return (0);
    >  }
    >  
    >  /*
    > @@ -487,15 +518,13 @@ mextfree_register(void (*fn)(caddr_t, u_
    >  	return num_extfree_fns++;
    >  }
    >  
    > -void
    > +static void
    >  m_extfree(struct mbuf *m)
    >  {
    > -	if (m_extunref(m) == 0) {
    > -		KASSERT(m->m_ext.ext_free_fn < num_extfree_fns);
    > -		mextfree_fns[m->m_ext.ext_free_fn](m->m_ext.ext_buf,
    > -		    m->m_ext.ext_size, m->m_ext.ext_arg);
    > -	}
    > -
    > +	KASSERT(m->m_ext.ext_free_fn < num_extfree_fns);
    > +	mextfree_fns[m->m_ext.ext_free_fn](m->m_ext.ext_buf,
    > +	    m->m_ext.ext_size, m->m_ext.ext_arg);
    > + 
    >  	m->m_flags &= ~(M_EXT|M_EXTWR);
    >  }
    >  
    > @@ -656,9 +685,9 @@ m_copym(struct mbuf *m0, int off, int le
    >  		}
    >  		n->m_len = min(len, m->m_len - off);
    >  		if (m->m_flags & M_EXT) {
    > +			if (m_extref(m, n, wait) != 0)
    > +				goto nospace;
    >  			n->m_data = m->m_data + off;
    > -			n->m_ext = m->m_ext;
    > -			MCLADDREFERENCE(m, n);
    >  		} else {
    >  			n->m_data += m->m_data -
    >  			    (m->m_flags & M_PKTHDR ? m->m_pktdat : m->m_dat);
    > @@ -1089,8 +1118,12 @@ m_split(struct mbuf *m0, int len0, int w
    >  			return (NULL);
    >  	}
    >  	if (m->m_flags & M_EXT) {
    > -		n->m_ext = m->m_ext;
    > -		MCLADDREFERENCE(m, n);
    > +		if (m_extref(m, n, wait) != 0) {
    > +			m_freem(n);
    > +			if (m0->m_flags & M_PKTHDR)
    > +				m0->m_pkthdr.len = olen;
    > +			return (NULL);
    > +		}
    >  		n->m_data = m->m_data + len;
    >  	} else {
    >  		m_align(n, remain);
    > @@ -1271,13 +1304,17 @@ m_devget(char *buf, int totlen, int off)
    >  void
    >  m_zero(struct mbuf *m)
    >  {
    > -	if (M_READONLY(m)) {
    > -		mtx_enter(&m_extref_mtx);
    > -		if ((m->m_flags & M_EXT) && MCLISREFERENCED(m)) {
    > -			m->m_ext.ext_nextref->m_flags |= M_ZEROIZE;
    > -			m->m_ext.ext_prevref->m_flags |= M_ZEROIZE;
    > -		}
    > -		mtx_leave(&m_extref_mtx);
    > +	if (ISSET(m->m_flags, M_EXT) &&
    > +	    m->m_ext.ext_free_fn == m_extfree_refs_fn) {
    > +		struct m_ext_refs *mrefs = m->m_ext.ext_arg;
    > +
    > +		/*
    > +		 * this variable only transitions in one direction,
    > +		 * so if there is a race it will be toward the same
    > +		 * result and therefore there is no loss.
    > +		 */
    > +
    > +		mrefs->zero = 1;
    >  		return;
    >  	}
    >  
    > @@ -1525,9 +1562,7 @@ m_print(void *v,
    >  		    m->m_ext.ext_buf, m->m_ext.ext_size);
    >  		(*pr)("m_ext.ext_free_fn: %u\tm_ext.ext_arg: %p\n",
    >  		    m->m_ext.ext_free_fn, m->m_ext.ext_arg);
    > -		(*pr)("m_ext.ext_nextref: %p\tm_ext.ext_prevref: %p\n",
    > -		    m->m_ext.ext_nextref, m->m_ext.ext_prevref);
    > -
    > +		/* if m_ext.ext_free_fn == m_extfree_refs_fn ? */
    >  	}
    >  }
    >  
    > Index: sys/mbuf.h
    > ===================================================================
    > RCS file: /cvs/src/sys/sys/mbuf.h,v
    > diff -u -p -r1.269 mbuf.h
    > --- sys/mbuf.h	5 Feb 2026 03:26:00 -0000	1.269
    > +++ sys/mbuf.h	22 Apr 2026 06:04:19 -0000
    > @@ -145,8 +145,6 @@ struct mbuf_ext {
    >  	void	*ext_arg;
    >  	u_int	ext_free_fn;		/* index of free function */
    >  	u_int	ext_size;		/* size of buffer, for ext_free_fn */
    > -	struct mbuf *ext_nextref;
    > -	struct mbuf *ext_prevref;
    >  #ifdef DEBUG
    >  	const char *ext_ofile;
    >  	const char *ext_nfile;
    > @@ -282,13 +280,22 @@ struct mbuf {
    >  #define MCLREFDEBUGO(m, file, line)
    >  #endif
    >  
    > -#define	MCLISREFERENCED(m)	((m)->m_ext.ext_nextref != (m))
    > +int	m_ext_refs_shared(struct mbuf *);
    >  
    > -#define	MCLADDREFERENCE(o, n)	m_extref((o), (n))
    > +static inline int
    > +m_extreferenced(struct mbuf *m)
    > +{
    > +	extern u_int m_extfree_refs_fn;
    > +
    > +	if (m->m_ext.ext_free_fn != m_extfree_refs_fn)
    > +		return (0);
    > +
    > +	return (m_ext_refs_shared(m));
    > +}
    > +
    > +#define	MCLISREFERENCED(m)	m_extreferenced(m)
    >  
    >  #define	MCLINITREFERENCE(m)	do {					\
    > -		(m)->m_ext.ext_prevref = (m);				\
    > -		(m)->m_ext.ext_nextref = (m);				\
    >  		MCLREFDEBUGO((m), __FILE__, __LINE__);			\
    >  		MCLREFDEBUGN((m), NULL, 0);				\
    >  	} while (/* CONSTCOND */ 0)
    > @@ -439,7 +446,6 @@ int	m_leadingspace(struct mbuf *);
    >  int	m_trailingspace(struct mbuf *);
    >  void	m_align(struct mbuf *, int);
    >  struct mbuf *m_clget(struct mbuf *, int, u_int);
    > -void	m_extref(struct mbuf *, struct mbuf *);
    >  void	m_pool_init(struct pool *, u_int, u_int, const char *);
    >  u_int	m_pool_used(void);
    >  void	m_extfree_pool(caddr_t, u_int, void *);
    > 
    
    
  • Alexander Bluhm:

    mbuf cluster m_extref_mtx contention