Index | Thread | Search

From:
Alexander Bluhm <bluhm@openbsd.org>
Subject:
Re: mbuf cluster m_extref_mtx contention
To:
David Gwynne <david@gwynne.id.au>
Cc:
tech@openbsd.org
Date:
Mon, 30 Mar 2026 13:56:38 +0200

Download raw body.

Thread
On Tue, Mar 17, 2026 at 02:24:01PM +0100, Alexander Bluhm wrote:
> On Wed, Feb 11, 2026 at 12:26:15PM +1000, David Gwynne wrote:
> > this is the proxy ref thing if you want to give it a go. the code is a
> > lot simpler at least.
> 
> I have tested both variants of the diff again and with this one I
> got a panic.  It was the sixth repetition of my test runs, this
> time with btrace profiling turned on.

It is reproduceable and it is triggered by btrace.

*cpu1: uvm_fault(0xffffffff82bb0cb8, 0x2414c5fe0, 0, 1) -> e

*58677  355342      0      0  7     0x14200                softnet0

ddb{1}> trace
x86_ipi_db(ffff80005a5eaff0) at x86_ipi_db+0x16
x86_ipi_handler() at x86_ipi_handler+0x80
Xresume_lapic_ipi() at Xresume_lapic_ipi+0x27
x86_bus_space_io_write_1(3f8,0,20) at x86_bus_space_io_write_1+0x1d
comcnputc(800,20) at comcnputc+0xdc
cnputc(20) at cnputc+0x47
db_putchar(70) at db_putchar+0x42a
kprintf() at kprintf+0x12ff
db_printf(ffffffff82662513) at db_printf+0x6d
db_ktrap(6,0,ffff8000632447a0) at db_ktrap+0x205
kerntrap(ffff8000632447a0) at kerntrap+0xe2
alltraps_kern_meltdown() at alltraps_kern_meltdown+0x7b
refcnt_rele(fffffd8f371b6d50) at refcnt_rele+0x78
m_extfree_refs(fffffd8003fc7540,840,fffffd8f371b6d40) at m_extfree_refs+0x31
m_free(fffffd808accec00) at m_free+0x170
m_freem(fffffd8089550e00) at m_freem+0x38
ip6_forward(fffffd8089d77900,0,2) at ip6_forward+0x9bf
pf_refragment6(ffff800063244c00,fffffd8f080a7010,0,0,0) at pf_refragment6+0x19a
pf_test(18,3,ffff800000393048,ffff800063244e48) at pf_test+0x83f
ip6_forward(fffffd8089550e00,ffffffff82b77608,2) at ip6_forward+0x3ce
ip6_input_if(ffff800063244fd8,ffff800063244fe4,29,0,ffff800000279048,ffffffff82b775d8) at ip6_input_if+0x87d
ipv6_input(ffff800000279048,fffffd808accec00,ffffffff82b775d8) at ipv6_input+0x42
if_input_process(ffff800000279048,ffff800063245098,0) at if_input_process+0x20a
ifiq_process(ffff800000279480) at ifiq_process+0xa1
taskq_thread(ffff80000002c000) at taskq_thread+0x129
end trace frame: 0x0, count: -25

ddb{1}> x/i refcnt_rele+0x78
refcnt_rele+0x78:       movq    0(%rsi,%rcx,8),%rsi

/home/bluhm/openbsd/cvs/src/sys/kern/kern_synch.c:933
    1cc3:       48 63 4f 04             movslq 0x4(%rdi),%rcx
    1cc7:       48 85 c9                test   %rcx,%rcx
    1cca:       7e c7                   jle    1c93 <refcnt_rele+0x33>
    1ccc:       48 8b 35 00 00 00 00    mov    0(%rip),%rsi        # 1cd3 <refcnt_rele+0x73>
    1cd3:       48 85 f6                test   %rsi,%rsi
    1cd6:       74 bb                   je     1c93 <refcnt_rele+0x33>
    1cd8:       48 8b 34 ce             mov    (%rsi,%rcx,8),%rsi
*   1cdc:       83 7e 2c 00             cmpl   $0x0,0x2c(%rsi)
    1ce0:       74 b1                   je     1c93 <refcnt_rele+0x33>
    1ce2:       48 8b 7e 10             mov    0x10(%rsi),%rdi
    1ce6:       4c 8b 5f 18             mov    0x18(%rdi),%r11
    1cea:       89 c1                   mov    %eax,%ecx
    1cec:       41 b8 ff ff ff ff       mov    $0xffffffff,%r8d
    1cf2:       41 89 c6                mov    %eax,%r14d
    1cf5:       31 c0                   xor    %eax,%eax
    1cf7:       e8 00 00 00 00          callq  1cfc <refcnt_rele+0x9c>
    1cfc:       48 c7 44 24 f8 00 00    movq   $0x0,0xfffffffffffffff8(%rsp)
    1d03:       00 00
    1d05:       44 89 f0                mov    %r14d,%eax
    1d08:       eb 89                   jmp    1c93 <refcnt_rele+0x33>
/home/bluhm/openbsd/cvs/src/sys/kern/kern_synch.c:932

   925  int
   926  refcnt_rele(struct refcnt *r)
   927  {
   928          u_int refs;
   929
   930          membar_exit_before_atomic();
   931          refs = atomic_dec_int_nv(&r->r_refs);
   932          KASSERT(refs != ~0);
*  933          TRACEINDEX(refcnt, r->r_traceidx, r, refs + 1, -1);
   934          if (refs == 0) {
   935                  membar_enter_after_atomic();
   936                  return (1);
   937          }
   938          return (0);
   939  }

   330  #define DT_INDEX_ENTER(func, index, args...) do {
   331          extern struct dt_probe **_DT_INDEX_P(func);
   332
   333          if (__predict_false(dt_tracing) &&
   334              __predict_false(index > 0) &&
   335              __predict_true(_DT_INDEX_P(func) != NULL)) {
*  336                  struct dt_probe *dtp = _DT_INDEX_P(func)[index];
   337
   338                  if(__predict_false(dtp->dtp_recording)) {
   339                          struct dt_provider *dtpv = dtp->dtp_prov;
   340
   341                          dtpv->dtpv_enter(dtpv, dtp, args);
   342                  }
   343          }
   344  } while (0)

r->r_traceidx is invalid so it crashed when dt(4) uses it.

I think this is not a btrace problem, but a use after free.  btrace
just makes it visible.  When refcnt_rele() is run, refcnt r must
be valid memory.

I still prefer the other refcounting diff.  It works fine for me
and maybe a bit faster.

bluhm