diff options
author | Christopher Wellons <wellons@nullprogram.com> | 2017-10-09 18:37:41 -0400 |
---|---|---|
committer | Christopher Wellons <wellons@nullprogram.com> | 2017-10-09 18:52:08 -0400 |
commit | f2d0e24c3864d726cd009901726df4778ad3e0d5 (patch) | |
tree | 6cf9b9dfa7908fef82fb0128a19e9cb3f1d8abb9 | |
parent | ce3f8ac69ece118a88240dfa29554a279e926364 (diff) |
-rw-r--r-- | test/tests.c | 18 | ||||
-rw-r--r-- | utf8.h | 3 |
2 files changed, 19 insertions, 2 deletions
diff --git a/test/tests.c b/test/tests.c index be94789..c16576c 100644 --- a/test/tests.c +++ b/test/tests.c @@ -23,7 +23,7 @@ main(void) /* Make sure it can decode every character */ { long failures = 0; - for (unsigned long i = 0; i < 0x1ffff; i++) { + for (unsigned long i = 0; i < 0x10ffff; i++) { if (!IS_SURROGATE(i)) { int e; uint32_t c; @@ -36,6 +36,22 @@ main(void) TEST(failures == 0, "decode all, errors: %ld", failures); } + /* Reject everything outside of U+0000..U+10FFFF */ + { + long failures = 0; + for (unsigned long i = 0x110000; i < 0x1fffff; i++) { + int e; + uint32_t c; + unsigned char buf[8] = {0}; + utf8_encode(buf, i); + unsigned char *end = utf8_decode(buf, &c, &e); + failures += !e; + failures += end - buf != 4; + } + TEST(failures == 0, "out of range, errors: %ld", failures); + } + + /* Does it reject all surrogate halves? */ { long failures = 0; @@ -53,8 +53,9 @@ utf8_decode(void *buf, uint32_t *c, int *e) *c >>= shiftc[len]; /* Accumulate the various error conditions. */ - *e = (*c < mins[len]) << 6; + *e = (*c < mins[len]) << 6; // non-canonical encoding *e |= ((*c >> 11) == 0x1b) << 7; // surrogate half? + *e |= (*c > 0x10FFFF) << 8; // out of range? *e |= (s[1] & 0xc0) >> 2; *e |= (s[2] & 0xc0) >> 4; *e |= (s[3] ) >> 6; |