summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorChristopher Wellons <wellons@nullprogram.com>2017-10-05 23:36:13 -0400
committerChristopher Wellons <wellons@nullprogram.com>2017-10-05 23:36:13 -0400
commit79fb5c8d086b5c9c96a96b1bb07de227793a6fb1 (patch)
tree3c4e3dcdefb59281c2c296d2feff8352e3616ad4
parentfa01ab965478fd4286ae9a70d0aad952cbc4c13e (diff)
Add some comments
-rw-r--r--test/benchmark.c4
-rw-r--r--utf8.h4
2 files changed, 6 insertions, 2 deletions
diff --git a/test/benchmark.c b/test/benchmark.c
index 2914a50..a94ebfc 100644
--- a/test/benchmark.c
+++ b/test/benchmark.c
@@ -23,6 +23,7 @@ pcg32(uint64_t *s)
return *s >> shift;
}
+/* Generate a random codepoint whose UTF-8 length is uniformly selected. */
static long
randchar(uint64_t *s)
{
@@ -51,6 +52,9 @@ alarm_handler(int signum)
running = 0;
}
+/* Fill buffer with random characters, with evenly-distributed encoded
+ * lengths.
+ */
static void *
buffer_fill(void *buf, size_t z)
{
diff --git a/utf8.h b/utf8.h
index 66d7840..257dfa3 100644
--- a/utf8.h
+++ b/utf8.h
@@ -46,11 +46,11 @@ utf8_decode(void *buf, long *c, int *e) {
*c >>= shiftc[len];
*e = (*c < (1L << thresholds[len]) - 1) << 6;
- *e |= ((*c >> 11) == 0x1b) << 7;
+ *e |= ((*c >> 11) == 0x1b) << 7; // surrogate half?
*e |= (s[1] & 0xc0U) >> 2;
*e |= (s[2] & 0xc0U) >> 4;
*e |= (s[3] ) >> 6;
- *e ^= 0x2aU;
+ *e ^= 0x2aU; // top two bits of each tail byte correct?
*e >>= shifte[len];
return s + len;