Download raw body.
btrace(8): cache ELF .symtab, .strtab in sorted array
On 13/03/24(Wed) 19:58, Scott Cheloha via tech wrote:
> On Thu, Feb 29, 2024 at 11:01:11AM -0600, Scott Cheloha wrote:
> > On Wed, Feb 28, 2024 at 03:49:54PM +0100, Martin Pieuchot wrote:
> > > On 27/02/24(Tue) 21:05, Scott Cheloha wrote:
> > > > On Tue, Feb 27, 2024 at 06:22:18PM -0600, Scott Cheloha wrote:
> > > > > On Sun, Feb 18, 2024 at 06:07:28PM -0600, Scott Cheloha wrote:
> > > > > > When btrace(8) runs a profiling probe, the most expensive thing it
> > > > > > does is call kelf_snprintf(). kelf_snprintf() walks through the ELF
> > > > > > .symtab to find a function matching the PC and then looks up the
> > > > > > function's name in the ELF .strtab.
> > > > > >
> > > > > > This patch consolidates the .symtab parsing and searching logic into
> > > > > > kelf_open(). We store each function found in the .symtab in an array
> > > > > > sorted by st_value alongside its size and name. Then, at runtime,
> > > > > > kelf_snprintf() calls bsearch(3) to match the given PC to a function.
> > > > > >
> > > > > > On my laptop this patch reduces the average cost of kelf_snprintf()
> > > > > > at idle by about 325x:
> > > > > >
> > > > > > # -current
> > > > > > $ doas btrace /usr/share/btrace/kprofile.bt >/dev/null
> > > > > > btrace: kelf_snprintf: cputime 5.250057903 calls 13082 avg 0.000401319
> > > > > >
> > > > > > # Patched
> > > > > > $ doas ./obj/btrace /usr/share/btrace/kprofile.bt >/dev/null
> > > > > > btrace: kelf_snprintf: cputime 0.017177200 calls 13896 avg 0.000001236
> > > > > >
> > > > > > This could be further optimized by deferring symbol lookup until a
> > > > > > given stacktrace is printed. That would require typed maps, though.
> > > > > >
> > > > > > One thing is missing: I need help working the "best offset" logic for
> > > > > > unsized symbols into this scheme.
> > > > >
> > > > > Ping.
> > > > >
> > > > > Here's a tidier patch.
> > > > >
> > > > > This is a very nice speedup. The patch dramatically reduces the drop
> > > > > rate.
> > > >
> > > > (whoops, wrong patch, my bad. this is what I meant to send.)
> > >
> > > ok with two tweaks below.
> > >
> > > > [...]
> > > > +
> > > > + /*
> > > > + * All errors after this point are allocation failures.
> > > > + * From here on, don't jump to "bad", just call err(3).
> > > > + */
> > >
> > > Please drop this comment, paraphrasing the code is not useful.
> >
> > Dropped.
> >
> > > > [...]
> > > > + syms->table = tmp;
> > > > + debug("%s: %zu symbols %zu functions\n",
> > > > + path, symtab_size, syms->nsymb);
> > >
> > > Please drop this debug. It is not useful for debugging btrace(8) to
> > > know how many symbols the kernel contains.
> >
> > Dropped.
> >
> > As discussed off-list, I have also added code to handle function
> > symbols with an st_size of zero. We just pretend that they extend
> > from their st_value to that of the next function symbol in the address
> > space. This is effectively what we are already doing in kelf_snprintf()
> > with the bestidx logic.
>
> Ping.
ok mpi@
> Index: ksyms.c
> ===================================================================
> RCS file: /cvs/src/usr.sbin/btrace/ksyms.c,v
> diff -u -p -r1.8 -r1.7
> --- ksyms.c 14 Mar 2024 00:54:54 -0000 1.8
> +++ ksyms.c 12 Mar 2024 17:22:24 -0000 1.7
> @@ -1,4 +1,4 @@
> -/* $OpenBSD: ksyms.c,v 1.8 2024/03/14 00:54:54 cheloha Exp $ */
> +/* $OpenBSD: ksyms.c,v 1.7 2024/03/12 17:22:24 cheloha Exp $ */
>
> /*
> * Copyright (c) 2016 Martin Pieuchot <mpi@openbsd.org>
> @@ -23,6 +23,7 @@
> #include <err.h>
> #include <fcntl.h>
> #include <gelf.h>
> +#include <stdint.h>
> #include <stdio.h>
> #include <stdlib.h>
> #include <string.h>
> @@ -30,60 +31,149 @@
>
> #include "btrace.h"
>
> +struct sym {
> + char *sym_name;
> + unsigned long sym_value; /* from st_value */
> + unsigned long sym_size; /* from st_size */
> +};
> +
> struct syms {
> - int fd;
> - Elf *elf;
> - Elf_Scn *symtab;
> - size_t strtabndx, nsymb;
> + struct sym *table;
> + size_t nsymb;
> };
>
> -int kelf_parse(struct syms *);
> +int sym_compare_search(const void *, const void *);
> +int sym_compare_sort(const void *, const void *);
>
> struct syms *
> kelf_open(const char *path)
> {
> - struct syms *syms;
> - int error;
> + char *name;
> + Elf *elf;
> + Elf_Data *data = NULL;
> + Elf_Scn *scn = NULL, *symtab;
> + GElf_Sym sym;
> + GElf_Shdr shdr;
> + size_t i, shstrndx, strtabndx = SIZE_MAX, symtab_size;
> + unsigned long diff;
> + struct sym *tmp;
> + struct syms *syms = NULL;
> + int fd;
>
> if (elf_version(EV_CURRENT) == EV_NONE)
> errx(1, "elf_version: %s", elf_errmsg(-1));
>
> - if ((syms = calloc(1, sizeof(*syms))) == NULL)
> - err(1, NULL);
> -
> - syms->fd = open(path, O_RDONLY);
> - if (syms->fd == -1) {
> + fd = open(path, O_RDONLY);
> + if (fd == -1) {
> warn("open: %s", path);
> - free(syms);
> return NULL;
> }
>
> - if ((syms->elf = elf_begin(syms->fd, ELF_C_READ, NULL)) == NULL) {
> + if ((elf = elf_begin(fd, ELF_C_READ, NULL)) == NULL) {
> warnx("elf_begin: %s", elf_errmsg(-1));
> goto bad;
> }
>
> - if (elf_kind(syms->elf) != ELF_K_ELF)
> + if (elf_kind(elf) != ELF_K_ELF)
> goto bad;
>
> - error = kelf_parse(syms);
> - if (error)
> + if (elf_getshdrstrndx(elf, &shstrndx) != 0) {
> + warnx("elf_getshdrstrndx: %s", elf_errmsg(-1));
> goto bad;
> + }
>
> - return syms;
> + while ((scn = elf_nextscn(elf, scn)) != NULL) {
> + if (gelf_getshdr(scn, &shdr) != &shdr) {
> + warnx("elf_getshdr: %s", elf_errmsg(-1));
> + goto bad;
> + }
> + if ((name = elf_strptr(elf, shstrndx, shdr.sh_name)) == NULL) {
> + warnx("elf_strptr: %s", elf_errmsg(-1));
> + goto bad;
> + }
> + if (strcmp(name, ELF_SYMTAB) == 0 &&
> + shdr.sh_type == SHT_SYMTAB && shdr.sh_entsize != 0) {
> + symtab = scn;
> + symtab_size = shdr.sh_size / shdr.sh_entsize;
> + }
> + if (strcmp(name, ELF_STRTAB) == 0 &&
> + shdr.sh_type == SHT_STRTAB) {
> + strtabndx = elf_ndxscn(scn);
> + }
> + }
> + if (symtab == NULL) {
> + warnx("%s: %s: section not found", path, ELF_SYMTAB);
> + goto bad;
> + }
> + if (strtabndx == SIZE_MAX) {
> + warnx("%s: %s: section not found", path, ELF_STRTAB);
> + goto bad;
> + }
> +
> + data = elf_rawdata(symtab, data);
> + if (data == NULL)
> + goto bad;
> +
> + if ((syms = calloc(1, sizeof(*syms))) == NULL)
> + err(1, NULL);
> + syms->table = calloc(symtab_size, sizeof *syms->table);
> + if (syms->table == NULL)
> + err(1, NULL);
> + for (i = 0; i < symtab_size; i++) {
> + if (gelf_getsym(data, i, &sym) == NULL)
> + continue;
> + if (GELF_ST_TYPE(sym.st_info) != STT_FUNC)
> + continue;
> + name = elf_strptr(elf, strtabndx, sym.st_name);
> + if (name == NULL)
> + continue;
> + syms->table[syms->nsymb].sym_name = strdup(name);
> + if (syms->table[syms->nsymb].sym_name == NULL)
> + err(1, NULL);
> + syms->table[syms->nsymb].sym_value = sym.st_value;
> + syms->table[syms->nsymb].sym_size = sym.st_size;
> + syms->nsymb++;
> + }
> + tmp = reallocarray(syms->table, syms->nsymb, sizeof *syms->table);
> + if (tmp == NULL)
> + err(1, NULL);
> + syms->table = tmp;
> +
> + /* Sort symbols in ascending order by address. */
> + qsort(syms->table, syms->nsymb, sizeof *syms->table, sym_compare_sort);
> +
> + /*
> + * Some functions, particularly those written in assembly, have an
> + * st_size of zero. We can approximate a size for these by assuming
> + * that they extend from their st_value to that of the next function.
> + */
> + for (i = 0; i < syms->nsymb; i++) {
> + if (syms->table[i].sym_size != 0)
> + continue;
> + /* Can't do anything for the last symbol. */
> + if (i + 1 == syms->nsymb)
> + continue;
> + diff = syms->table[i + 1].sym_value - syms->table[i].sym_value;
> + syms->table[i].sym_size = diff;
> + }
>
> bad:
> - kelf_close(syms);
> - return NULL;
> + elf_end(elf);
> + close(fd);
> + return syms;
> }
>
> void
> kelf_close(struct syms *syms)
> {
> + size_t i;
> +
> if (syms == NULL)
> return;
> - elf_end(syms->elf);
> - close(syms->fd);
> +
> + for (i = 0; i < syms->nsymb; i++)
> + free(syms->table[i].sym_name);
> + free(syms->table);
> free(syms);
> }
>
> @@ -91,106 +181,46 @@ int
> kelf_snprintsym(struct syms *syms, char *str, size_t size, unsigned long pc,
> unsigned long off)
> {
> - GElf_Sym sym;
> - Elf_Data *data = NULL;
> - Elf_Addr offset, bestoff = 0;
> - size_t i, bestidx = 0;
> - char *name;
> - int cnt;
> + struct sym key = { .sym_value = pc + off };
> + struct sym *entry;
> + Elf_Addr offset;
>
> if (syms == NULL)
> goto fallback;
>
> - data = elf_rawdata(syms->symtab, data);
> - if (data == NULL)
> + entry = bsearch(&key, syms->table, syms->nsymb, sizeof *syms->table,
> + sym_compare_search);
> + if (entry == NULL)
> goto fallback;
>
> - for (i = 0; i < syms->nsymb; i++) {
> - if (gelf_getsym(data, i, &sym) == NULL)
> - continue;
> - if (GELF_ST_TYPE(sym.st_info) != STT_FUNC)
> - continue;
> - if (pc >= sym.st_value + off) {
> - if (pc < (sym.st_value + off + sym.st_size))
> - break;
> - /* Workaround for symbols w/o size, usually asm ones. */
> - if (sym.st_size == 0 && sym.st_value + off > bestoff) {
> - bestidx = i;
> - bestoff = sym.st_value + off;
> - }
> - }
> - }
> -
> - if (i == syms->nsymb) {
> - if (bestidx == 0 || gelf_getsym(data, bestidx, &sym) == NULL)
> - goto fallback;
> - }
> -
> - name = elf_strptr(syms->elf, syms->strtabndx, sym.st_name);
> - if (name != NULL)
> - cnt = snprintf(str, size, "\n%s", name);
> - else
> - cnt = snprintf(str, size, "\n0x%llx", sym.st_value);
> - if (cnt < 0)
> - return cnt;
> -
> - offset = pc - (sym.st_value + off);
> + offset = pc - (entry->sym_value + off);
> if (offset != 0) {
> - int l;
> -
> - l = snprintf(str + cnt, size > (size_t)cnt ? size - cnt : 0,
> - "+0x%llx", (unsigned long long)offset);
> - if (l < 0)
> - return l;
> - cnt += l;
> + return snprintf(str, size, "\n%s+0x%llx",
> + entry->sym_name, (unsigned long long)offset);
> }
>
> - return cnt;
> + return snprintf(str, size, "\n%s", entry->sym_name);
>
> fallback:
> return snprintf(str, size, "\n0x%lx", pc);
> }
>
> int
> -kelf_parse(struct syms *syms)
> +sym_compare_sort(const void *ap, const void *bp)
> {
> - GElf_Shdr shdr;
> - Elf_Scn *scn, *scnctf;
> - char *name;
> - size_t shstrndx;
> -
> - if (elf_getshdrstrndx(syms->elf, &shstrndx) != 0) {
> - warnx("elf_getshdrstrndx: %s", elf_errmsg(-1));
> - return 1;
> - }
> -
> - scn = scnctf = NULL;
> - while ((scn = elf_nextscn(syms->elf, scn)) != NULL) {
> - if (gelf_getshdr(scn, &shdr) != &shdr) {
> - warnx("elf_getshdr: %s", elf_errmsg(-1));
> - return 1;
> - }
> -
> - if ((name = elf_strptr(syms->elf, shstrndx,
> - shdr.sh_name)) == NULL) {
> - warnx("elf_strptr: %s", elf_errmsg(-1));
> - return 1;
> - }
> -
> - if (strcmp(name, ELF_SYMTAB) == 0 &&
> - shdr.sh_type == SHT_SYMTAB && shdr.sh_entsize != 0) {
> - syms->symtab = scn;
> - syms->nsymb = shdr.sh_size / shdr.sh_entsize;
> - }
> + const struct sym *a = ap, *b = bp;
>
> - if (strcmp(name, ELF_STRTAB) == 0 &&
> - shdr.sh_type == SHT_STRTAB) {
> - syms->strtabndx = elf_ndxscn(scn);
> - }
> - }
> + if (a->sym_value < b->sym_value)
> + return -1;
> + return a->sym_value > b->sym_value;
> +}
>
> - if (syms->symtab == NULL)
> - warnx("symbol table not found");
> +int
> +sym_compare_search(const void *keyp, const void *entryp)
> +{
> + const struct sym *entry = entryp, *key = keyp;
>
> - return 0;
> + if (key->sym_value < entry->sym_value)
> + return -1;
> + return key->sym_value >= entry->sym_value + entry->sym_size;
> }
>
btrace(8): cache ELF .symtab, .strtab in sorted array