Index | Thread | Search

From:
Marc Espie <marc.espie.openbsd@gmail.com>
Subject:
make: caching regexps or not ?
To:
tech@openbsd.org
Date:
Fri, 14 Jun 2024 14:51:53 +0200

Download raw body.

Thread
  • Marc Espie:

    make: caching regexps or not ?

The following patch has make indexing :C regexps it sees and compiling them
just once. I was hoping to get a significant speed-up, but at least
for the regexp expensive go module in ports, it doesn't since to change
significant timings.

Maybe it's better elsewhere ?

I think it's fairly straightforward and *could* speed up complex compilations.

For what it's worth, bsd.port.mk, in sysutils/telegraf, encounter >3500 regexps
while expanding the loop, but they're mostly duplicates. I end up with 19 different
regexps !

Index: varmodifiers.c
===================================================================
RCS file: /build/data/openbsd/cvs/src/usr.bin/make/varmodifiers.c,v
diff -u -p -r1.49 varmodifiers.c
--- varmodifiers.c	4 Sep 2023 11:35:11 -0000	1.49
+++ varmodifiers.c	14 Jun 2024 09:13:39 -0000
@@ -73,6 +73,7 @@
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
+#include <ohash.h>
 #include "defines.h"
 #include "buf.h"
 #include "var.h"
@@ -127,8 +128,19 @@ typedef struct {
 	regmatch_t	 *matches;
 	char	 *replace;
 	int 	  flags;
+	int 	  error;
+	char	  source[1];
 } VarREPattern;
 
+static struct ohash cache;	/* hash table of regexps */
+struct ohash_info cache_info = {
+	offsetof(VarREPattern, source), NULL, hash_calloc, hash_free, 
+	element_alloc
+};
+
+static VarREPattern *string_to_pattern(const char *);
+static VarREPattern *create_pattern(const char *, const char *);
+
 static bool VarSubstitute(struct Name *, bool, Buffer, void *);
 static char *VarGetPattern(SymTable *, int, const char **, int, int,
     size_t *, VarPattern *);
@@ -188,6 +200,7 @@ VarModifiers_Init()
 	choose_mod['U'] = &upper_mod;
 	choose_mod['L'] = &lower_mod;
 	choose_mod['s'] = &shcmd_mod;
+	ohash_init(&cache, 10, &cache_info);
 }
 
 /* All modifiers handle addSpace (need to add a space before placing the
@@ -1042,30 +1055,60 @@ free_patternarg(void *p)
 	free(vp);
 }
 
+static VarREPattern *
+create_pattern(const char *source, const char *esource)
+{
+	VarREPattern *p;
+
+	p = ohash_create_entry(&cache_info, source, &esource);
+	p->error = regcomp(&(p->re), source, REG_EXTENDED);
+	if (!p->error) {
+		p->nsub = p->re.re_nsub + 1;
+		if (p->nsub < 1)
+			p->nsub = 1;
+		if (p->nsub > 10)
+			p->nsub = 10;
+		p->matches = ereallocarray(NULL, p->nsub, sizeof(regmatch_t));
+	}
+	return p;
+}
+
+static VarREPattern *
+string_to_pattern(const char *source)
+{
+	uint32_t hv;
+	unsigned int slot;
+	VarREPattern *result;
+	const char *esource = NULL;
+	
+	hv = ohash_interval(source, &esource);
+
+	slot = ohash_lookup_interval(&cache, source, esource, hv);
+
+	result = ohash_find(&cache, slot);
+	if (result == NULL) {
+		result = create_pattern(source, esource);
+		if (result != NULL)
+			ohash_insert(&cache, slot, result);
+	}
+	return result;
+}
+
 static char *
 do_regex(const char *s, const struct Name *n UNUSED, void *arg)
 {
-	VarREPattern p2;
 	VarPattern *p = arg;
-	int error;
 	char *result;
+	VarREPattern *p2;
 
-	error = regcomp(&p2.re, p->lhs, REG_EXTENDED);
-	if (error) {
-		VarREError(error, &p2.re, "RE substitution error");
+	p2 = string_to_pattern(p->lhs);
+	if (p2->error) {
+		VarREError(p2->error, &p2->re, "RE substitution error");
 		return var_Error;
 	}
-	p2.nsub = p2.re.re_nsub + 1;
-	p2.replace = p->rhs;
-	p2.flags = p->flags;
-	if (p2.nsub < 1)
-		p2.nsub = 1;
-	if (p2.nsub > 10)
-		p2.nsub = 10;
-	p2.matches = ereallocarray(NULL, p2.nsub, sizeof(regmatch_t));
-	result = VarModify((char *)s, VarRESubstitute, &p2);
-	regfree(&p2.re);
-	free(p2.matches);
+	p2->flags = p->flags;
+	p2->replace = p->rhs;
+	result = VarModify((char *)s, VarRESubstitute, p2);
 	return result;
 }