Index | Thread | Search

From:
Christian Schulte <cs@schulte.it>
Subject:
Re: [PATCH] amd64: import optimized memcmp from FreeBSD
To:
Stuart Henderson <stu@spacehopper.org>
Cc:
Mateusz Guzik <mjguzik@gmail.com>, deraadt@openbsd.org, tech@openbsd.org
Date:
Tue, 3 Dec 2024 01:38:03 +0100

Download raw body.

Thread
On 12/2/24 11:03, Stuart Henderson wrote:
> On 2024/12/02 07:57, Mateusz Guzik wrote:
>> OpenBSD libc for the amd64 platform uses a C variant of the memcmp
>> routine (as opposed to the kernel which has the problematic rep variant)
> 
> oh, interesting, so bcmp is asm and memcmp is C for amd64!

Noticed this myself. I am quite confused right now, as I also wrote a bcmp
function in C and the compiler also generates faster code. This is what I am
doing:

cc -c -o c_memcmp.o -pg -O2 c_memcmp.c
cc -c -o c_bcmp.o -pg -O2 c_bcmp.c
cc -c -o test.o -pg test.c
cc -o test -pg -static c_memcmp.o c_bcmp.o test.o

Files see below. Those C versions run a tiny bit slower whith small length
values due to the two added length checks (>8,>4). For every other values (>8)
the C versions run a lot faster. I doubt it myself and maybe gprof test gmon.out
just displays garbage. Just update the test(0..32) call in main() of test.c and
see the profiling results yourself. I have not yet tested those functions
extensively. Just profiled them. Not very experienced with C. So if this looks
completely incorrect, just forget about it.

-- 
Christian


---- c_memcmp.c ----

#include <sys/types.h>

int
c_memcmp(const void *b1, const void *b2, size_t len)
{
	size_t 		w;
	unsigned char  *p8_1 = (void *) b1, *p8_2 = (void *) b2;
	uint32_t       *p32_1, *p32_2;
	uint64_t       *p64_1, *p64_2;

	if (len > 8) {
		w = len / 8;

		p64_1 = (uint64_t *) p8_1;
		p64_2 = (uint64_t *) p8_2;

		do {
			if (*p64_1++ != *p64_2++) {
				p8_1 = (unsigned char *) --p64_1;
				p8_2 = (unsigned char *) --p64_2;
				goto bytes;
			}
			len -= 8;
		} while (--w != 0);

		p8_1 = (unsigned char *) p64_1;
		p8_2 = (unsigned char *) p64_2;
	}

	if (len > 4) {
		w = len / 4;

		p32_1 = (uint32_t *) p8_1;
		p32_2 = (uint32_t *) p8_2;

		do {
			if (*p32_1++ != *p32_2++) {
				p8_1 = (unsigned char *) --p32_1;
				p8_2 = (unsigned char *) --p32_2;
				goto bytes;
			}
			len -= 4;
		} while (--w != 0);

		p8_1 = (unsigned char *) p32_1;
		p8_2 = (unsigned char *) p32_2;
	}
bytes:
	if (len != 0) {
		do {
			if (*p8_1++ != *p8_2++)
				return *--p8_1 - *--p8_2;
		} while (--len != 0);
	}
	return (0);
}

---- c_bcmp.c ----
 
#include <sys/types.h>

int
c_bcmp(const void *b1, const void *b2, size_t len)
{
	size_t 		w;
	uint8_t        *p8_1 = (void *) b1, *p8_2 = (void *) b2;
	uint32_t       *p32_1, *p32_2;
	uint64_t       *p64_1, *p64_2;

	if (len > 8) {
		w = len / 8;

		p64_1 = (uint64_t *) p8_1;
		p64_2 = (uint64_t *) p8_2;

		do {
			if (*p64_1++ != *p64_2++) {
				p8_1 = (uint8_t *) --p64_1;
				p8_2 = (uint8_t *) --p64_2;
				goto bytes;
			}
			len -= 8;
		} while (--w != 0);

		p8_1 = (uint8_t *) p64_1;
		p8_2 = (uint8_t *) p64_2;
	}

	if (len > 4) {
		w = len / 4;

		p32_1 = (uint32_t *) p8_1;
		p32_2 = (uint32_t *) p8_2;

		do {
			if (*p32_1++ != *p32_2++) {
				p8_1 = (uint8_t *) --p32_1;
				p8_2 = (uint8_t *) --p32_2;
				goto bytes;
			}
			len -= 4;
		} while (--w != 0);

		p8_1 = (uint8_t *) p32_1;
		p8_2 = (uint8_t *) p32_2;
	}
bytes:
	if (len != 0) {
		do {
			if (*p8_1++ != *p8_2++)
				return (-1);

		} while (--len != 0);
	}
	return (0);
}

---- test.c ----


#include <stdlib.h>
#include <string.h>
#include <sys/types.h>
#include <err.h>

#define ITERATIONS 1000000000

int
c_memcmp(const void *, const void *, size_t);

int
c_bcmp(const void *, const void *, size_t);

void
test(size_t len)
{
	char           *b1, *b2;
	int 		i;

	b1 = malloc(len);
	if (b1 == NULL)
		err(1, NULL);

	b2 = malloc(len);
	if (b2 == NULL)
		err(1, NULL);

	memset(b1, 'A', len);
	memset(b2, 'A', len);

	if (len > 0)
		b2[len - 1] = 'B';

	for (i = ITERATIONS; i > 0; i--)
		if (memcmp(b1, b2, len) != -1 && len > 0)
			err(1, "equal");

	for (i = ITERATIONS; i > 0; i--)
		if (c_memcmp(b1, b2, len) != -1 && len > 0)
			err(1, "equal");

	for (i = ITERATIONS; i > 0; i--)
		if (bcmp(b1, b2, len) == 0 && len > 0)
			err(1, "equal");

	for (i = ITERATIONS; i > 0; i--)
		if (c_bcmp(b1, b2, len) == 0 && len > 0)
			err(1, "equal");

	free(b1);
	free(b2);
}

int
main()
{
	test(9);
	return (0);
}