From: Christian Weisgerber Subject: clang: enable blake3 asm optimizations on amd64? To: tech@openbsd.org Date: Sat, 17 Feb 2024 20:59:07 +0100 LLVM ships with optimized amd64 assembly code for the BLAKE3 hash function. We don't enable this in our clang build, but we easily could. The code uses cpuid at runtime to identify the available instruction set extensions and dispatches the corresponding subroutine. Straightforward diff below. From some cursory grepping, I don't think BLAKE3 is used much if at all in clang. So I don't know if this buys us anything. It does not reduce the time required by clang to compile itself. Do we want this? diff refs/heads/master refs/heads/local commit - fee2627c71f884e2ff97309b73d94287a50bfe1e commit + 4fd865e3a21f25c8c858311e995ec4c39af4ddc7 blob - 99b3ba2dad319b896e233d0ada87b71121421557 blob + 3580011caf685a4476aa05e6abfc992ee1df7532 --- gnu/usr.bin/clang/libLLVMSupport/Makefile +++ gnu/usr.bin/clang/libLLVMSupport/Makefile @@ -5,9 +5,11 @@ NOPIC= NOPROFILE= CPPFLAGS+= -I${LLVM_SRCS}/include/llvm/ADT \ - -I${LLVM_SRCS}/include/llvm/Support \ - -DBLAKE3_NO_AVX512 -DBLAKE3_NO_AVX2 \ + -I${LLVM_SRCS}/include/llvm/Support +.if ${MACHINE_ARCH} != "amd64" +CPPFLAGS+= -DBLAKE3_NO_AVX512 -DBLAKE3_NO_AVX2 \ -DBLAKE3_NO_SSE41 -DBLAKE3_NO_SSE2 +.endif .include @@ -158,6 +160,12 @@ SRCS+= blake3.c \ blake3_dispatch.c \ blake3_portable.c \ blake3_neon.c +.if ${MACHINE_ARCH} == "amd64" +SRCS+= blake3_sse2_x86-64_unix.S \ + blake3_sse41_x86-64_unix.S \ + blake3_avx2_x86-64_unix.S \ + blake3_avx512_x86-64_unix.S +.endif SupportError.cpp: ln -s ${LLVM_SRCS}/lib/Support/Error.cpp $@ blob - 09b376179a5353844f977720b90b8639b6560cad blob + f2e7f00e94bc6df64d3c558c6a9cba610fbf5387 --- gnu/usr.bin/clang/libLLVMSupport/Makefile.shared +++ gnu/usr.bin/clang/libLLVMSupport/Makefile.shared @@ -5,9 +5,11 @@ LIB= LLVMSupport NOPROFILE= CPPFLAGS+= -I${LLVM_SRCS}/include/llvm/ADT \ - -I${LLVM_SRCS}/include/llvm/Support \ - -DBLAKE3_NO_AVX512 -DBLAKE3_NO_AVX2 \ + -I${LLVM_SRCS}/include/llvm/Support +.if ${MACHINE_ARCH} != "amd64" +CPPFLAGS+= -DBLAKE3_NO_AVX512 -DBLAKE3_NO_AVX2 \ -DBLAKE3_NO_SSE41 -DBLAKE3_NO_SSE2 +.endif SRCS+= ABIBreak.cpp \ @@ -157,6 +159,12 @@ SRCS+= blake3.c \ blake3_dispatch.c \ blake3_portable.c \ blake3_neon.c +.if ${MACHINE_ARCH} == "amd64" +SRCS+= blake3_sse2_x86-64_unix.S \ + blake3_sse41_x86-64_unix.S \ + blake3_avx2_x86-64_unix.S \ + blake3_avx512_x86-64_unix.S +.endif SupportError.cpp: ln -s ${LLVM_SRCS}/lib/Support/Error.cpp $@ -- Christian "naddy" Weisgerber naddy@mips.inka.de