From: Marc Espie Subject: make: caching regexps or not ? To: tech@openbsd.org Date: Fri, 14 Jun 2024 14:51:53 +0200 The following patch has make indexing :C regexps it sees and compiling them just once. I was hoping to get a significant speed-up, but at least for the regexp expensive go module in ports, it doesn't since to change significant timings. Maybe it's better elsewhere ? I think it's fairly straightforward and *could* speed up complex compilations. For what it's worth, bsd.port.mk, in sysutils/telegraf, encounter >3500 regexps while expanding the loop, but they're mostly duplicates. I end up with 19 different regexps ! Index: varmodifiers.c =================================================================== RCS file: /build/data/openbsd/cvs/src/usr.bin/make/varmodifiers.c,v diff -u -p -r1.49 varmodifiers.c --- varmodifiers.c 4 Sep 2023 11:35:11 -0000 1.49 +++ varmodifiers.c 14 Jun 2024 09:13:39 -0000 @@ -73,6 +73,7 @@ #include #include #include +#include #include "defines.h" #include "buf.h" #include "var.h" @@ -127,8 +128,19 @@ typedef struct { regmatch_t *matches; char *replace; int flags; + int error; + char source[1]; } VarREPattern; +static struct ohash cache; /* hash table of regexps */ +struct ohash_info cache_info = { + offsetof(VarREPattern, source), NULL, hash_calloc, hash_free, + element_alloc +}; + +static VarREPattern *string_to_pattern(const char *); +static VarREPattern *create_pattern(const char *, const char *); + static bool VarSubstitute(struct Name *, bool, Buffer, void *); static char *VarGetPattern(SymTable *, int, const char **, int, int, size_t *, VarPattern *); @@ -188,6 +200,7 @@ VarModifiers_Init() choose_mod['U'] = &upper_mod; choose_mod['L'] = &lower_mod; choose_mod['s'] = &shcmd_mod; + ohash_init(&cache, 10, &cache_info); } /* All modifiers handle addSpace (need to add a space before placing the @@ -1042,30 +1055,60 @@ free_patternarg(void *p) free(vp); } +static VarREPattern * +create_pattern(const char *source, const char *esource) +{ + VarREPattern *p; + + p = ohash_create_entry(&cache_info, source, &esource); + p->error = regcomp(&(p->re), source, REG_EXTENDED); + if (!p->error) { + p->nsub = p->re.re_nsub + 1; + if (p->nsub < 1) + p->nsub = 1; + if (p->nsub > 10) + p->nsub = 10; + p->matches = ereallocarray(NULL, p->nsub, sizeof(regmatch_t)); + } + return p; +} + +static VarREPattern * +string_to_pattern(const char *source) +{ + uint32_t hv; + unsigned int slot; + VarREPattern *result; + const char *esource = NULL; + + hv = ohash_interval(source, &esource); + + slot = ohash_lookup_interval(&cache, source, esource, hv); + + result = ohash_find(&cache, slot); + if (result == NULL) { + result = create_pattern(source, esource); + if (result != NULL) + ohash_insert(&cache, slot, result); + } + return result; +} + static char * do_regex(const char *s, const struct Name *n UNUSED, void *arg) { - VarREPattern p2; VarPattern *p = arg; - int error; char *result; + VarREPattern *p2; - error = regcomp(&p2.re, p->lhs, REG_EXTENDED); - if (error) { - VarREError(error, &p2.re, "RE substitution error"); + p2 = string_to_pattern(p->lhs); + if (p2->error) { + VarREError(p2->error, &p2->re, "RE substitution error"); return var_Error; } - p2.nsub = p2.re.re_nsub + 1; - p2.replace = p->rhs; - p2.flags = p->flags; - if (p2.nsub < 1) - p2.nsub = 1; - if (p2.nsub > 10) - p2.nsub = 10; - p2.matches = ereallocarray(NULL, p2.nsub, sizeof(regmatch_t)); - result = VarModify((char *)s, VarRESubstitute, &p2); - regfree(&p2.re); - free(p2.matches); + p2->flags = p->flags; + p2->replace = p->rhs; + result = VarModify((char *)s, VarRESubstitute, p2); return result; }