From da86c3076c0adf5d0ae8026b3a44942def657b6a Mon Sep 17 00:00:00 2001 From: Christopher Wellons Date: Sun, 8 Oct 2017 13:12:37 -0400 Subject: Add simple decoder to the benchmarks --- test/benchmark.c | 66 +++++++++++++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 63 insertions(+), 3 deletions(-) diff --git a/test/benchmark.c b/test/benchmark.c index 4eb51da..4e93408 100644 --- a/test/benchmark.c +++ b/test/benchmark.c @@ -71,9 +71,50 @@ buffer_fill(void *buf, size_t z) return p; } +static unsigned char * +utf8_simple(unsigned char *s, long *c) +{ + unsigned char *next; + if (s[0] < 0x80) { + *c = s[0]; + next = s + 1; + } else if ((s[0] & 0xe0) == 0xc0) { + *c = ((long)(s[0] & 0x1f) << 6) | + ((long)(s[1] & 0x3f) << 0); + if ((s[1] & 0xc0) != 0x80) + *c = -1; + next = s + 2; + } else if ((s[0] & 0xf0) == 0xe0) { + *c = ((long)(s[0] & 0x0f) << 12) | + ((long)(s[1] & 0x3f) << 6) | + ((long)(s[2] & 0x3f) << 0); + if ((s[1] & 0xc0) != 0x80 || + (s[2] & 0xc0) != 0x80) + *c = -1; + next = s + 3; + } else if ((s[0] & 0xf8) == 0xf0 && (s[0] <= 0xf4)) { + *c = ((long)(s[0] & 0x07) << 18) | + ((long)(s[1] & 0x3f) << 12) | + ((long)(s[2] & 0x3f) << 6) | + ((long)(s[3] & 0x3f) << 0); + if ((s[1] & 0xc0) != 0x80 || + (s[2] & 0xc0) != 0x80 || + (s[3] & 0xc0) != 0x80) + *c = -1; + next = s + 4; + } else { + *c = -1; // invalid + next = s + 1; // skip this byte + } + if (*c >= 0xd800 && *c <= 0xdfff) + *c = -1; // surrogate half + return next; +} + int main(void) { + double rate; long errors, n; size_t z = BUFLEN * 1024L * 1024; unsigned char *buffer = malloc(z); @@ -97,8 +138,7 @@ main(void) if (p == end) // reached the end successfully? n++; } while (running); - - double rate = n * (end - buffer) / (double)SECONDS / 1024 / 1024; + rate = n * (end - buffer) / (double)SECONDS / 1024 / 1024; printf("branchless: %f MB/s, %ld errors\n", rate, errors); /* Benchmark Bjoern Hoehrmann's decoder */ @@ -120,9 +160,29 @@ main(void) if (p == end) // reached the end successfully? n++; } while (running); - rate = n * (end - buffer) / (double)SECONDS / 1024 / 1024; printf("Hoehrmann: %f MB/s, %ld errors\n", rate, errors); + /* Benchmark simple decoder */ + running = 1; + signal(SIGALRM, alarm_handler); + alarm(SECONDS); + errors = n = 0; + do { + unsigned char *p = buffer; + long c; + long count = 0; + while (p < end) { + p = utf8_simple(p, &c); + count++; + if (c < 0) + errors++; + } + if (p == end) // reached the end successfully? + n++; + } while (running); + rate = n * (end - buffer) / (double)SECONDS / 1024 / 1024; + printf("Simple: %f MB/s, %ld errors\n", rate, errors); + free(buffer); } -- cgit v1.2.3