From 0b4b374e733d3eb28051ced0e034e850a4e61834 Mon Sep 17 00:00:00 2001 From: Christopher Wellons Date: Fri, 6 Oct 2017 10:17:12 -0400 Subject: Tweak decoder types a bit --- Makefile | 3 ++- utf8.h | 19 +++++-------------- 2 files changed, 7 insertions(+), 15 deletions(-) diff --git a/Makefile b/Makefile index 2c59e54..86bacb5 100644 --- a/Makefile +++ b/Makefile @@ -1,5 +1,6 @@ -CC = c99 +CC = cc -std=c99 CFLAGS = -Wall -Wextra -O3 -g3 -march=native + all: benchmark tests benchmark: test/benchmark.c utf8.h test/utf8-encode.h test/bh-utf8.h diff --git a/utf8.h b/utf8.h index d359952..1942cb4 100644 --- a/utf8.h +++ b/utf8.h @@ -1,5 +1,4 @@ /* Branchless UTF-8 decoder - * Chris Wellons * * This is free and unencumbered software released into the public domain. */ @@ -23,18 +22,10 @@ utf8_decode(void *buf, long *c, int *e) { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2, 2, 3, 3, 4, 0 }; - static const unsigned char masks[] = { - 0x00, 0x7f, 0x1f, 0x0f, 0x07 - }; - static const char thresholds[] = { - 22, 0, 7, 11, 16 - }; - static const char shiftc[] = { - 0, 18, 12, 6, 0 - }; - static const char shifte[] = { - 0, 6, 4, 2, 0 - }; + static const int masks[] = {0x00, 0x7f, 0x1f, 0x0f, 0x07}; + static const int thresh[] = {22, 0, 7, 11, 16}; + static const int shiftc[] = {0, 18, 12, 6, 0}; + static const int shifte[] = {0, 6, 4, 2, 0}; unsigned char *s = buf; int len = utf8_lengths[s[0] >> 3]; @@ -45,7 +36,7 @@ utf8_decode(void *buf, long *c, int *e) { *c |= (s[3] & 0x3fU) << 0; *c >>= shiftc[len]; - *e = (*c < (1L << thresholds[len]) - 1) << 6; + *e = (*c < (1L << thresh[len]) - 1) << 6; *e |= ((*c >> 11) == 0x1b) << 7; // surrogate half? *e |= (s[1] & 0xc0U) >> 2; *e |= (s[2] & 0xc0U) >> 4; -- cgit v1.2.3