Download raw body.
make: caching regexps or not ?
The following patch has make indexing :C regexps it sees and compiling them
just once. I was hoping to get a significant speed-up, but at least
for the regexp expensive go module in ports, it doesn't since to change
significant timings.
Maybe it's better elsewhere ?
I think it's fairly straightforward and *could* speed up complex compilations.
For what it's worth, bsd.port.mk, in sysutils/telegraf, encounter >3500 regexps
while expanding the loop, but they're mostly duplicates. I end up with 19 different
regexps !
Index: varmodifiers.c
===================================================================
RCS file: /build/data/openbsd/cvs/src/usr.bin/make/varmodifiers.c,v
diff -u -p -r1.49 varmodifiers.c
--- varmodifiers.c 4 Sep 2023 11:35:11 -0000 1.49
+++ varmodifiers.c 14 Jun 2024 09:13:39 -0000
@@ -73,6 +73,7 @@
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
+#include <ohash.h>
#include "defines.h"
#include "buf.h"
#include "var.h"
@@ -127,8 +128,19 @@ typedef struct {
regmatch_t *matches;
char *replace;
int flags;
+ int error;
+ char source[1];
} VarREPattern;
+static struct ohash cache; /* hash table of regexps */
+struct ohash_info cache_info = {
+ offsetof(VarREPattern, source), NULL, hash_calloc, hash_free,
+ element_alloc
+};
+
+static VarREPattern *string_to_pattern(const char *);
+static VarREPattern *create_pattern(const char *, const char *);
+
static bool VarSubstitute(struct Name *, bool, Buffer, void *);
static char *VarGetPattern(SymTable *, int, const char **, int, int,
size_t *, VarPattern *);
@@ -188,6 +200,7 @@ VarModifiers_Init()
choose_mod['U'] = &upper_mod;
choose_mod['L'] = &lower_mod;
choose_mod['s'] = &shcmd_mod;
+ ohash_init(&cache, 10, &cache_info);
}
/* All modifiers handle addSpace (need to add a space before placing the
@@ -1042,30 +1055,60 @@ free_patternarg(void *p)
free(vp);
}
+static VarREPattern *
+create_pattern(const char *source, const char *esource)
+{
+ VarREPattern *p;
+
+ p = ohash_create_entry(&cache_info, source, &esource);
+ p->error = regcomp(&(p->re), source, REG_EXTENDED);
+ if (!p->error) {
+ p->nsub = p->re.re_nsub + 1;
+ if (p->nsub < 1)
+ p->nsub = 1;
+ if (p->nsub > 10)
+ p->nsub = 10;
+ p->matches = ereallocarray(NULL, p->nsub, sizeof(regmatch_t));
+ }
+ return p;
+}
+
+static VarREPattern *
+string_to_pattern(const char *source)
+{
+ uint32_t hv;
+ unsigned int slot;
+ VarREPattern *result;
+ const char *esource = NULL;
+
+ hv = ohash_interval(source, &esource);
+
+ slot = ohash_lookup_interval(&cache, source, esource, hv);
+
+ result = ohash_find(&cache, slot);
+ if (result == NULL) {
+ result = create_pattern(source, esource);
+ if (result != NULL)
+ ohash_insert(&cache, slot, result);
+ }
+ return result;
+}
+
static char *
do_regex(const char *s, const struct Name *n UNUSED, void *arg)
{
- VarREPattern p2;
VarPattern *p = arg;
- int error;
char *result;
+ VarREPattern *p2;
- error = regcomp(&p2.re, p->lhs, REG_EXTENDED);
- if (error) {
- VarREError(error, &p2.re, "RE substitution error");
+ p2 = string_to_pattern(p->lhs);
+ if (p2->error) {
+ VarREError(p2->error, &p2->re, "RE substitution error");
return var_Error;
}
- p2.nsub = p2.re.re_nsub + 1;
- p2.replace = p->rhs;
- p2.flags = p->flags;
- if (p2.nsub < 1)
- p2.nsub = 1;
- if (p2.nsub > 10)
- p2.nsub = 10;
- p2.matches = ereallocarray(NULL, p2.nsub, sizeof(regmatch_t));
- result = VarModify((char *)s, VarRESubstitute, &p2);
- regfree(&p2.re);
- free(p2.matches);
+ p2->flags = p->flags;
+ p2->replace = p->rhs;
+ result = VarModify((char *)s, VarRESubstitute, p2);
return result;
}
make: caching regexps or not ?