summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorChristopher Wellons <wellons@nullprogram.com>2017-10-06 10:17:12 -0400
committerChristopher Wellons <wellons@nullprogram.com>2017-10-06 10:17:12 -0400
commit0b4b374e733d3eb28051ced0e034e850a4e61834 (patch)
tree40b32a8e8a1dedd7af6e005a58cf1df65a5931f5
parentdca0eda3474931849efc1a5c2516a813c501b5d5 (diff)
Tweak decoder types a bit
-rw-r--r--Makefile3
-rw-r--r--utf8.h19
2 files changed, 7 insertions, 15 deletions
diff --git a/Makefile b/Makefile
index 2c59e54..86bacb5 100644
--- a/Makefile
+++ b/Makefile
@@ -1,5 +1,6 @@
-CC = c99
+CC = cc -std=c99
CFLAGS = -Wall -Wextra -O3 -g3 -march=native
+
all: benchmark tests
benchmark: test/benchmark.c utf8.h test/utf8-encode.h test/bh-utf8.h
diff --git a/utf8.h b/utf8.h
index d359952..1942cb4 100644
--- a/utf8.h
+++ b/utf8.h
@@ -1,5 +1,4 @@
/* Branchless UTF-8 decoder
- * Chris Wellons
*
* This is free and unencumbered software released into the public domain.
*/
@@ -23,18 +22,10 @@ utf8_decode(void *buf, long *c, int *e) {
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2, 2, 3, 3, 4, 0
};
- static const unsigned char masks[] = {
- 0x00, 0x7f, 0x1f, 0x0f, 0x07
- };
- static const char thresholds[] = {
- 22, 0, 7, 11, 16
- };
- static const char shiftc[] = {
- 0, 18, 12, 6, 0
- };
- static const char shifte[] = {
- 0, 6, 4, 2, 0
- };
+ static const int masks[] = {0x00, 0x7f, 0x1f, 0x0f, 0x07};
+ static const int thresh[] = {22, 0, 7, 11, 16};
+ static const int shiftc[] = {0, 18, 12, 6, 0};
+ static const int shifte[] = {0, 6, 4, 2, 0};
unsigned char *s = buf;
int len = utf8_lengths[s[0] >> 3];
@@ -45,7 +36,7 @@ utf8_decode(void *buf, long *c, int *e) {
*c |= (s[3] & 0x3fU) << 0;
*c >>= shiftc[len];
- *e = (*c < (1L << thresholds[len]) - 1) << 6;
+ *e = (*c < (1L << thresh[len]) - 1) << 6;
*e |= ((*c >> 11) == 0x1b) << 7; // surrogate half?
*e |= (s[1] & 0xc0U) >> 2;
*e |= (s[2] & 0xc0U) >> 4;