Index | Thread | Search

From:
"Scott Cheloha via tech" <tech@openbsd.org>
Subject:
Re: btrace(8): cache ELF .symtab, .strtab in sorted array
To:
tech@openbsd.org
Cc:
mpi@openbsd.org
Date:
Wed, 13 Mar 2024 19:58:28 -0500

Download raw body.

Thread
On Thu, Feb 29, 2024 at 11:01:11AM -0600, Scott Cheloha wrote:
> On Wed, Feb 28, 2024 at 03:49:54PM +0100, Martin Pieuchot wrote:
> > On 27/02/24(Tue) 21:05, Scott Cheloha wrote:
> > > On Tue, Feb 27, 2024 at 06:22:18PM -0600, Scott Cheloha wrote:
> > > > On Sun, Feb 18, 2024 at 06:07:28PM -0600, Scott Cheloha wrote:
> > > > > When btrace(8) runs a profiling probe, the most expensive thing it
> > > > > does is call kelf_snprintf().  kelf_snprintf() walks through the ELF
> > > > > .symtab to find a function matching the PC and then looks up the
> > > > > function's name in the ELF .strtab.
> > > > > 
> > > > > This patch consolidates the .symtab parsing and searching logic into
> > > > > kelf_open().  We store each function found in the .symtab in an array
> > > > > sorted by st_value alongside its size and name.  Then, at runtime,
> > > > > kelf_snprintf() calls bsearch(3) to match the given PC to a function.
> > > > > 
> > > > > On my laptop this patch reduces the average cost of kelf_snprintf()
> > > > > at idle by about 325x:
> > > > > 
> > > > > # -current
> > > > > $ doas btrace /usr/share/btrace/kprofile.bt >/dev/null
> > > > > btrace: kelf_snprintf: cputime 5.250057903 calls 13082 avg 0.000401319
> > > > > 
> > > > > # Patched
> > > > > $ doas ./obj/btrace /usr/share/btrace/kprofile.bt >/dev/null
> > > > > btrace: kelf_snprintf: cputime 0.017177200 calls 13896 avg 0.000001236
> > > > > 
> > > > > This could be further optimized by deferring symbol lookup until a
> > > > > given stacktrace is printed.  That would require typed maps, though.
> > > > > 
> > > > > One thing is missing: I need help working the "best offset" logic for
> > > > > unsized symbols into this scheme.
> > > > 
> > > > Ping.
> > > > 
> > > > Here's a tidier patch.
> > > > 
> > > > This is a very nice speedup.  The patch dramatically reduces the drop
> > > > rate.
> > > 
> > > (whoops, wrong patch, my bad.  this is what I meant to send.)
> > 
> > ok with two tweaks below.
> > 
> > > [...]
> > > +
> > > +	/*
> > > +	 * All errors after this point are allocation failures.
> > > +	 * From here on, don't jump to "bad", just call err(3).
> > > +	 */
> > 
> > Please drop this comment, paraphrasing the code is not useful.
> 
> Dropped.
> 
> > > [...]
> > > +	syms->table = tmp;
> > > +	debug("%s: %zu symbols %zu functions\n",
> > > +	    path, symtab_size, syms->nsymb);
> > 
> > Please drop this debug.  It is not useful for debugging btrace(8) to
> > know how many symbols the kernel contains.
> 
> Dropped.
> 
> As discussed off-list, I have also added code to handle function
> symbols with an st_size of zero.  We just pretend that they extend
> from their st_value to that of the next function symbol in the address
> space.  This is effectively what we are already doing in kelf_snprintf()
> with the bestidx logic.

Ping.

Index: ksyms.c
===================================================================
RCS file: /cvs/src/usr.sbin/btrace/ksyms.c,v
diff -u -p -r1.8 -r1.7
--- ksyms.c	14 Mar 2024 00:54:54 -0000	1.8
+++ ksyms.c	12 Mar 2024 17:22:24 -0000	1.7
@@ -1,4 +1,4 @@
-/*	$OpenBSD: ksyms.c,v 1.8 2024/03/14 00:54:54 cheloha Exp $ */
+/*	$OpenBSD: ksyms.c,v 1.7 2024/03/12 17:22:24 cheloha Exp $ */
 
 /*
  * Copyright (c) 2016 Martin Pieuchot <mpi@openbsd.org>
@@ -23,6 +23,7 @@
 #include <err.h>
 #include <fcntl.h>
 #include <gelf.h>
+#include <stdint.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
@@ -30,60 +31,149 @@
 
 #include "btrace.h"
 
+struct sym {
+	char *sym_name;
+	unsigned long sym_value;	/* from st_value */
+	unsigned long sym_size;		/* from st_size */
+};
+
 struct syms {
-	int		 fd;
-	Elf		*elf;
-	Elf_Scn		*symtab;
-	size_t		 strtabndx, nsymb;
+	struct sym *table;
+	size_t nsymb;
 };
 
-int		 kelf_parse(struct syms *);
+int sym_compare_search(const void *, const void *);
+int sym_compare_sort(const void *, const void *);
 
 struct syms *
 kelf_open(const char *path)
 {
-	struct syms *syms;
-	int error;
+	char *name;
+	Elf *elf;
+	Elf_Data *data = NULL;
+	Elf_Scn	*scn = NULL, *symtab;
+	GElf_Sym sym;
+	GElf_Shdr shdr;
+	size_t i, shstrndx, strtabndx = SIZE_MAX, symtab_size;
+	unsigned long diff;
+	struct sym *tmp;
+	struct syms *syms = NULL;
+	int fd;
 
 	if (elf_version(EV_CURRENT) == EV_NONE)
 		errx(1, "elf_version: %s", elf_errmsg(-1));
 
-	if ((syms = calloc(1, sizeof(*syms))) == NULL)
-		err(1, NULL);
-
-	syms->fd = open(path, O_RDONLY);
-	if (syms->fd == -1) {
+	fd = open(path, O_RDONLY);
+	if (fd == -1) {
 		warn("open: %s", path);
-		free(syms);
 		return NULL;
 	}
 
-	if ((syms->elf = elf_begin(syms->fd, ELF_C_READ, NULL)) == NULL) {
+	if ((elf = elf_begin(fd, ELF_C_READ, NULL)) == NULL) {
 		warnx("elf_begin: %s", elf_errmsg(-1));
 		goto bad;
 	}
 
-	if (elf_kind(syms->elf) != ELF_K_ELF)
+	if (elf_kind(elf) != ELF_K_ELF)
 		goto bad;
 
-	error = kelf_parse(syms);
-	if (error)
+	if (elf_getshdrstrndx(elf, &shstrndx) != 0) {
+		warnx("elf_getshdrstrndx: %s", elf_errmsg(-1));
 		goto bad;
+	}
 
-	return syms;
+	while ((scn = elf_nextscn(elf, scn)) != NULL) {
+		if (gelf_getshdr(scn, &shdr) != &shdr) {
+			warnx("elf_getshdr: %s", elf_errmsg(-1));
+			goto bad;
+		}
+		if ((name = elf_strptr(elf, shstrndx, shdr.sh_name)) == NULL) {
+			warnx("elf_strptr: %s", elf_errmsg(-1));
+			goto bad;
+		}
+		if (strcmp(name, ELF_SYMTAB) == 0 &&
+		    shdr.sh_type == SHT_SYMTAB && shdr.sh_entsize != 0) {
+			symtab = scn;
+			symtab_size = shdr.sh_size / shdr.sh_entsize;
+		}
+		if (strcmp(name, ELF_STRTAB) == 0 &&
+		    shdr.sh_type == SHT_STRTAB) {
+			strtabndx = elf_ndxscn(scn);
+		}
+	}
+	if (symtab == NULL) {
+		warnx("%s: %s: section not found", path, ELF_SYMTAB);
+		goto bad;
+	}
+	if (strtabndx == SIZE_MAX) {
+		warnx("%s: %s: section not found", path, ELF_STRTAB);
+		goto bad;
+	}
+
+	data = elf_rawdata(symtab, data);
+	if (data == NULL)
+		goto bad;
+
+	if ((syms = calloc(1, sizeof(*syms))) == NULL)
+		err(1, NULL);
+	syms->table = calloc(symtab_size, sizeof *syms->table);
+	if (syms->table == NULL)
+		err(1, NULL);
+	for (i = 0; i < symtab_size; i++) {
+		if (gelf_getsym(data, i, &sym) == NULL)
+			continue;
+		if (GELF_ST_TYPE(sym.st_info) != STT_FUNC)
+			continue;
+		name = elf_strptr(elf, strtabndx, sym.st_name);
+		if (name == NULL)
+			continue;
+		syms->table[syms->nsymb].sym_name = strdup(name);
+		if (syms->table[syms->nsymb].sym_name == NULL)
+			err(1, NULL);
+		syms->table[syms->nsymb].sym_value = sym.st_value;
+		syms->table[syms->nsymb].sym_size = sym.st_size;
+		syms->nsymb++;
+	}
+	tmp = reallocarray(syms->table, syms->nsymb, sizeof *syms->table);
+	if (tmp == NULL)
+		err(1, NULL);
+	syms->table = tmp;
+
+	/* Sort symbols in ascending order by address. */
+	qsort(syms->table, syms->nsymb, sizeof *syms->table, sym_compare_sort);
+
+	/*
+	 * Some functions, particularly those written in assembly, have an
+	 * st_size of zero.  We can approximate a size for these by assuming
+	 * that they extend from their st_value to that of the next function.
+	 */
+	for (i = 0; i < syms->nsymb; i++) {
+		if (syms->table[i].sym_size != 0)
+			continue;
+		/* Can't do anything for the last symbol. */
+		if (i + 1 == syms->nsymb)
+			continue;
+		diff = syms->table[i + 1].sym_value - syms->table[i].sym_value;
+		syms->table[i].sym_size = diff;
+	}
 
 bad:
-	kelf_close(syms);
-	return NULL;
+	elf_end(elf);
+	close(fd);
+	return syms;
 }
 
 void
 kelf_close(struct syms *syms)
 {
+	size_t i;
+
 	if (syms == NULL)
 		return;
-	elf_end(syms->elf);
-	close(syms->fd);
+
+	for (i = 0; i < syms->nsymb; i++)
+		free(syms->table[i].sym_name);
+	free(syms->table);
 	free(syms);
 }
 
@@ -91,106 +181,46 @@ int
 kelf_snprintsym(struct syms *syms, char *str, size_t size, unsigned long pc,
     unsigned long off)
 {
-	GElf_Sym	 sym;
-	Elf_Data	*data = NULL;
-	Elf_Addr	 offset, bestoff = 0;
-	size_t		 i, bestidx = 0;
-	char		*name;
-	int		 cnt;
+	struct sym key = { .sym_value = pc + off };
+	struct sym *entry;
+	Elf_Addr offset;
 
 	if (syms == NULL)
 		goto fallback;
 
-	data = elf_rawdata(syms->symtab, data);
-	if (data == NULL)
+	entry = bsearch(&key, syms->table, syms->nsymb, sizeof *syms->table,
+	    sym_compare_search);
+	if (entry == NULL)
 		goto fallback;
 
-	for (i = 0; i < syms->nsymb; i++) {
-		if (gelf_getsym(data, i, &sym) == NULL)
-			continue;
-		if (GELF_ST_TYPE(sym.st_info) != STT_FUNC)
-			continue;
-		if (pc >= sym.st_value + off) {
-			if (pc < (sym.st_value + off + sym.st_size))
-				break;
-			/* Workaround for symbols w/o size, usually asm ones. */
-			if (sym.st_size == 0 && sym.st_value + off > bestoff) {
-				bestidx = i;
-				bestoff = sym.st_value + off;
-			}
-		}
-	}
-
-	if (i == syms->nsymb) {
-		if (bestidx == 0 || gelf_getsym(data, bestidx, &sym) == NULL)
-			goto fallback;
-	}
-
-	name = elf_strptr(syms->elf, syms->strtabndx, sym.st_name);
-	if (name != NULL)
-		cnt = snprintf(str, size, "\n%s", name);
-	else
-		cnt = snprintf(str, size, "\n0x%llx", sym.st_value);
-	if (cnt < 0)
-		return cnt;
-
-	offset = pc - (sym.st_value + off);
+	offset = pc - (entry->sym_value + off);
 	if (offset != 0) {
-		int l;
-
-		l = snprintf(str + cnt, size > (size_t)cnt ? size - cnt : 0,
-		    "+0x%llx", (unsigned long long)offset);
-		if (l < 0)
-			return l;
-		cnt += l;
+		return snprintf(str, size, "\n%s+0x%llx",
+		    entry->sym_name, (unsigned long long)offset);
 	}
 
-	return cnt;
+	return snprintf(str, size, "\n%s", entry->sym_name);
 
 fallback:
 	return snprintf(str, size, "\n0x%lx", pc);
 }
 
 int
-kelf_parse(struct syms *syms)
+sym_compare_sort(const void *ap, const void *bp)
 {
-	GElf_Shdr	 shdr;
-	Elf_Scn		*scn, *scnctf;
-	char		*name;
-	size_t		 shstrndx;
-
-	if (elf_getshdrstrndx(syms->elf, &shstrndx) != 0) {
-		warnx("elf_getshdrstrndx: %s", elf_errmsg(-1));
-		return 1;
-	}
-
-	scn = scnctf = NULL;
-	while ((scn = elf_nextscn(syms->elf, scn)) != NULL) {
-		if (gelf_getshdr(scn, &shdr) != &shdr) {
-			warnx("elf_getshdr: %s", elf_errmsg(-1));
-			return 1;
-		}
-
-		if ((name = elf_strptr(syms->elf, shstrndx,
-		    shdr.sh_name)) == NULL) {
-			warnx("elf_strptr: %s", elf_errmsg(-1));
-			return 1;
-		}
-
-		if (strcmp(name, ELF_SYMTAB) == 0 &&
-		    shdr.sh_type == SHT_SYMTAB && shdr.sh_entsize != 0) {
-			syms->symtab = scn;
-			syms->nsymb = shdr.sh_size / shdr.sh_entsize;
-		}
+	const struct sym *a = ap, *b = bp;
 
-		if (strcmp(name, ELF_STRTAB) == 0 &&
-		    shdr.sh_type == SHT_STRTAB) {
-			syms->strtabndx = elf_ndxscn(scn);
-		}
-	}
+	if (a->sym_value < b->sym_value)
+		return -1;
+	return a->sym_value > b->sym_value;
+}
 
-	if (syms->symtab == NULL)
-		warnx("symbol table not found");
+int
+sym_compare_search(const void *keyp, const void *entryp)
+{
+	const struct sym *entry = entryp, *key = keyp;
 
-	return 0;
+	if (key->sym_value < entry->sym_value)
+		return -1;
+	return key->sym_value >= entry->sym_value + entry->sym_size;
 }