From: Martin Pieuchot Subject: Re: btrace(8): cache ELF .symtab, .strtab in sorted array To: Scott Cheloha via tech Date: Thu, 14 Mar 2024 11:37:55 +0100 On 13/03/24(Wed) 19:58, Scott Cheloha via tech wrote: > On Thu, Feb 29, 2024 at 11:01:11AM -0600, Scott Cheloha wrote: > > On Wed, Feb 28, 2024 at 03:49:54PM +0100, Martin Pieuchot wrote: > > > On 27/02/24(Tue) 21:05, Scott Cheloha wrote: > > > > On Tue, Feb 27, 2024 at 06:22:18PM -0600, Scott Cheloha wrote: > > > > > On Sun, Feb 18, 2024 at 06:07:28PM -0600, Scott Cheloha wrote: > > > > > > When btrace(8) runs a profiling probe, the most expensive thing it > > > > > > does is call kelf_snprintf(). kelf_snprintf() walks through the ELF > > > > > > .symtab to find a function matching the PC and then looks up the > > > > > > function's name in the ELF .strtab. > > > > > > > > > > > > This patch consolidates the .symtab parsing and searching logic into > > > > > > kelf_open(). We store each function found in the .symtab in an array > > > > > > sorted by st_value alongside its size and name. Then, at runtime, > > > > > > kelf_snprintf() calls bsearch(3) to match the given PC to a function. > > > > > > > > > > > > On my laptop this patch reduces the average cost of kelf_snprintf() > > > > > > at idle by about 325x: > > > > > > > > > > > > # -current > > > > > > $ doas btrace /usr/share/btrace/kprofile.bt >/dev/null > > > > > > btrace: kelf_snprintf: cputime 5.250057903 calls 13082 avg 0.000401319 > > > > > > > > > > > > # Patched > > > > > > $ doas ./obj/btrace /usr/share/btrace/kprofile.bt >/dev/null > > > > > > btrace: kelf_snprintf: cputime 0.017177200 calls 13896 avg 0.000001236 > > > > > > > > > > > > This could be further optimized by deferring symbol lookup until a > > > > > > given stacktrace is printed. That would require typed maps, though. > > > > > > > > > > > > One thing is missing: I need help working the "best offset" logic for > > > > > > unsized symbols into this scheme. > > > > > > > > > > Ping. > > > > > > > > > > Here's a tidier patch. > > > > > > > > > > This is a very nice speedup. The patch dramatically reduces the drop > > > > > rate. > > > > > > > > (whoops, wrong patch, my bad. this is what I meant to send.) > > > > > > ok with two tweaks below. > > > > > > > [...] > > > > + > > > > + /* > > > > + * All errors after this point are allocation failures. > > > > + * From here on, don't jump to "bad", just call err(3). > > > > + */ > > > > > > Please drop this comment, paraphrasing the code is not useful. > > > > Dropped. > > > > > > [...] > > > > + syms->table = tmp; > > > > + debug("%s: %zu symbols %zu functions\n", > > > > + path, symtab_size, syms->nsymb); > > > > > > Please drop this debug. It is not useful for debugging btrace(8) to > > > know how many symbols the kernel contains. > > > > Dropped. > > > > As discussed off-list, I have also added code to handle function > > symbols with an st_size of zero. We just pretend that they extend > > from their st_value to that of the next function symbol in the address > > space. This is effectively what we are already doing in kelf_snprintf() > > with the bestidx logic. > > Ping. ok mpi@ > Index: ksyms.c > =================================================================== > RCS file: /cvs/src/usr.sbin/btrace/ksyms.c,v > diff -u -p -r1.8 -r1.7 > --- ksyms.c 14 Mar 2024 00:54:54 -0000 1.8 > +++ ksyms.c 12 Mar 2024 17:22:24 -0000 1.7 > @@ -1,4 +1,4 @@ > -/* $OpenBSD: ksyms.c,v 1.8 2024/03/14 00:54:54 cheloha Exp $ */ > +/* $OpenBSD: ksyms.c,v 1.7 2024/03/12 17:22:24 cheloha Exp $ */ > > /* > * Copyright (c) 2016 Martin Pieuchot > @@ -23,6 +23,7 @@ > #include > #include > #include > +#include > #include > #include > #include > @@ -30,60 +31,149 @@ > > #include "btrace.h" > > +struct sym { > + char *sym_name; > + unsigned long sym_value; /* from st_value */ > + unsigned long sym_size; /* from st_size */ > +}; > + > struct syms { > - int fd; > - Elf *elf; > - Elf_Scn *symtab; > - size_t strtabndx, nsymb; > + struct sym *table; > + size_t nsymb; > }; > > -int kelf_parse(struct syms *); > +int sym_compare_search(const void *, const void *); > +int sym_compare_sort(const void *, const void *); > > struct syms * > kelf_open(const char *path) > { > - struct syms *syms; > - int error; > + char *name; > + Elf *elf; > + Elf_Data *data = NULL; > + Elf_Scn *scn = NULL, *symtab; > + GElf_Sym sym; > + GElf_Shdr shdr; > + size_t i, shstrndx, strtabndx = SIZE_MAX, symtab_size; > + unsigned long diff; > + struct sym *tmp; > + struct syms *syms = NULL; > + int fd; > > if (elf_version(EV_CURRENT) == EV_NONE) > errx(1, "elf_version: %s", elf_errmsg(-1)); > > - if ((syms = calloc(1, sizeof(*syms))) == NULL) > - err(1, NULL); > - > - syms->fd = open(path, O_RDONLY); > - if (syms->fd == -1) { > + fd = open(path, O_RDONLY); > + if (fd == -1) { > warn("open: %s", path); > - free(syms); > return NULL; > } > > - if ((syms->elf = elf_begin(syms->fd, ELF_C_READ, NULL)) == NULL) { > + if ((elf = elf_begin(fd, ELF_C_READ, NULL)) == NULL) { > warnx("elf_begin: %s", elf_errmsg(-1)); > goto bad; > } > > - if (elf_kind(syms->elf) != ELF_K_ELF) > + if (elf_kind(elf) != ELF_K_ELF) > goto bad; > > - error = kelf_parse(syms); > - if (error) > + if (elf_getshdrstrndx(elf, &shstrndx) != 0) { > + warnx("elf_getshdrstrndx: %s", elf_errmsg(-1)); > goto bad; > + } > > - return syms; > + while ((scn = elf_nextscn(elf, scn)) != NULL) { > + if (gelf_getshdr(scn, &shdr) != &shdr) { > + warnx("elf_getshdr: %s", elf_errmsg(-1)); > + goto bad; > + } > + if ((name = elf_strptr(elf, shstrndx, shdr.sh_name)) == NULL) { > + warnx("elf_strptr: %s", elf_errmsg(-1)); > + goto bad; > + } > + if (strcmp(name, ELF_SYMTAB) == 0 && > + shdr.sh_type == SHT_SYMTAB && shdr.sh_entsize != 0) { > + symtab = scn; > + symtab_size = shdr.sh_size / shdr.sh_entsize; > + } > + if (strcmp(name, ELF_STRTAB) == 0 && > + shdr.sh_type == SHT_STRTAB) { > + strtabndx = elf_ndxscn(scn); > + } > + } > + if (symtab == NULL) { > + warnx("%s: %s: section not found", path, ELF_SYMTAB); > + goto bad; > + } > + if (strtabndx == SIZE_MAX) { > + warnx("%s: %s: section not found", path, ELF_STRTAB); > + goto bad; > + } > + > + data = elf_rawdata(symtab, data); > + if (data == NULL) > + goto bad; > + > + if ((syms = calloc(1, sizeof(*syms))) == NULL) > + err(1, NULL); > + syms->table = calloc(symtab_size, sizeof *syms->table); > + if (syms->table == NULL) > + err(1, NULL); > + for (i = 0; i < symtab_size; i++) { > + if (gelf_getsym(data, i, &sym) == NULL) > + continue; > + if (GELF_ST_TYPE(sym.st_info) != STT_FUNC) > + continue; > + name = elf_strptr(elf, strtabndx, sym.st_name); > + if (name == NULL) > + continue; > + syms->table[syms->nsymb].sym_name = strdup(name); > + if (syms->table[syms->nsymb].sym_name == NULL) > + err(1, NULL); > + syms->table[syms->nsymb].sym_value = sym.st_value; > + syms->table[syms->nsymb].sym_size = sym.st_size; > + syms->nsymb++; > + } > + tmp = reallocarray(syms->table, syms->nsymb, sizeof *syms->table); > + if (tmp == NULL) > + err(1, NULL); > + syms->table = tmp; > + > + /* Sort symbols in ascending order by address. */ > + qsort(syms->table, syms->nsymb, sizeof *syms->table, sym_compare_sort); > + > + /* > + * Some functions, particularly those written in assembly, have an > + * st_size of zero. We can approximate a size for these by assuming > + * that they extend from their st_value to that of the next function. > + */ > + for (i = 0; i < syms->nsymb; i++) { > + if (syms->table[i].sym_size != 0) > + continue; > + /* Can't do anything for the last symbol. */ > + if (i + 1 == syms->nsymb) > + continue; > + diff = syms->table[i + 1].sym_value - syms->table[i].sym_value; > + syms->table[i].sym_size = diff; > + } > > bad: > - kelf_close(syms); > - return NULL; > + elf_end(elf); > + close(fd); > + return syms; > } > > void > kelf_close(struct syms *syms) > { > + size_t i; > + > if (syms == NULL) > return; > - elf_end(syms->elf); > - close(syms->fd); > + > + for (i = 0; i < syms->nsymb; i++) > + free(syms->table[i].sym_name); > + free(syms->table); > free(syms); > } > > @@ -91,106 +181,46 @@ int > kelf_snprintsym(struct syms *syms, char *str, size_t size, unsigned long pc, > unsigned long off) > { > - GElf_Sym sym; > - Elf_Data *data = NULL; > - Elf_Addr offset, bestoff = 0; > - size_t i, bestidx = 0; > - char *name; > - int cnt; > + struct sym key = { .sym_value = pc + off }; > + struct sym *entry; > + Elf_Addr offset; > > if (syms == NULL) > goto fallback; > > - data = elf_rawdata(syms->symtab, data); > - if (data == NULL) > + entry = bsearch(&key, syms->table, syms->nsymb, sizeof *syms->table, > + sym_compare_search); > + if (entry == NULL) > goto fallback; > > - for (i = 0; i < syms->nsymb; i++) { > - if (gelf_getsym(data, i, &sym) == NULL) > - continue; > - if (GELF_ST_TYPE(sym.st_info) != STT_FUNC) > - continue; > - if (pc >= sym.st_value + off) { > - if (pc < (sym.st_value + off + sym.st_size)) > - break; > - /* Workaround for symbols w/o size, usually asm ones. */ > - if (sym.st_size == 0 && sym.st_value + off > bestoff) { > - bestidx = i; > - bestoff = sym.st_value + off; > - } > - } > - } > - > - if (i == syms->nsymb) { > - if (bestidx == 0 || gelf_getsym(data, bestidx, &sym) == NULL) > - goto fallback; > - } > - > - name = elf_strptr(syms->elf, syms->strtabndx, sym.st_name); > - if (name != NULL) > - cnt = snprintf(str, size, "\n%s", name); > - else > - cnt = snprintf(str, size, "\n0x%llx", sym.st_value); > - if (cnt < 0) > - return cnt; > - > - offset = pc - (sym.st_value + off); > + offset = pc - (entry->sym_value + off); > if (offset != 0) { > - int l; > - > - l = snprintf(str + cnt, size > (size_t)cnt ? size - cnt : 0, > - "+0x%llx", (unsigned long long)offset); > - if (l < 0) > - return l; > - cnt += l; > + return snprintf(str, size, "\n%s+0x%llx", > + entry->sym_name, (unsigned long long)offset); > } > > - return cnt; > + return snprintf(str, size, "\n%s", entry->sym_name); > > fallback: > return snprintf(str, size, "\n0x%lx", pc); > } > > int > -kelf_parse(struct syms *syms) > +sym_compare_sort(const void *ap, const void *bp) > { > - GElf_Shdr shdr; > - Elf_Scn *scn, *scnctf; > - char *name; > - size_t shstrndx; > - > - if (elf_getshdrstrndx(syms->elf, &shstrndx) != 0) { > - warnx("elf_getshdrstrndx: %s", elf_errmsg(-1)); > - return 1; > - } > - > - scn = scnctf = NULL; > - while ((scn = elf_nextscn(syms->elf, scn)) != NULL) { > - if (gelf_getshdr(scn, &shdr) != &shdr) { > - warnx("elf_getshdr: %s", elf_errmsg(-1)); > - return 1; > - } > - > - if ((name = elf_strptr(syms->elf, shstrndx, > - shdr.sh_name)) == NULL) { > - warnx("elf_strptr: %s", elf_errmsg(-1)); > - return 1; > - } > - > - if (strcmp(name, ELF_SYMTAB) == 0 && > - shdr.sh_type == SHT_SYMTAB && shdr.sh_entsize != 0) { > - syms->symtab = scn; > - syms->nsymb = shdr.sh_size / shdr.sh_entsize; > - } > + const struct sym *a = ap, *b = bp; > > - if (strcmp(name, ELF_STRTAB) == 0 && > - shdr.sh_type == SHT_STRTAB) { > - syms->strtabndx = elf_ndxscn(scn); > - } > - } > + if (a->sym_value < b->sym_value) > + return -1; > + return a->sym_value > b->sym_value; > +} > > - if (syms->symtab == NULL) > - warnx("symbol table not found"); > +int > +sym_compare_search(const void *keyp, const void *entryp) > +{ > + const struct sym *entry = entryp, *key = keyp; > > - return 0; > + if (key->sym_value < entry->sym_value) > + return -1; > + return key->sym_value >= entry->sym_value + entry->sym_size; > } >