summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--test/tests.c18
-rw-r--r--utf8.h3
2 files changed, 19 insertions, 2 deletions
diff --git a/test/tests.c b/test/tests.c
index be94789..c16576c 100644
--- a/test/tests.c
+++ b/test/tests.c
@@ -23,7 +23,7 @@ main(void)
/* Make sure it can decode every character */
{
long failures = 0;
- for (unsigned long i = 0; i < 0x1ffff; i++) {
+ for (unsigned long i = 0; i < 0x10ffff; i++) {
if (!IS_SURROGATE(i)) {
int e;
uint32_t c;
@@ -36,6 +36,22 @@ main(void)
TEST(failures == 0, "decode all, errors: %ld", failures);
}
+ /* Reject everything outside of U+0000..U+10FFFF */
+ {
+ long failures = 0;
+ for (unsigned long i = 0x110000; i < 0x1fffff; i++) {
+ int e;
+ uint32_t c;
+ unsigned char buf[8] = {0};
+ utf8_encode(buf, i);
+ unsigned char *end = utf8_decode(buf, &c, &e);
+ failures += !e;
+ failures += end - buf != 4;
+ }
+ TEST(failures == 0, "out of range, errors: %ld", failures);
+ }
+
+
/* Does it reject all surrogate halves? */
{
long failures = 0;
diff --git a/utf8.h b/utf8.h
index 419977d..8c6a7a0 100644
--- a/utf8.h
+++ b/utf8.h
@@ -53,8 +53,9 @@ utf8_decode(void *buf, uint32_t *c, int *e)
*c >>= shiftc[len];
/* Accumulate the various error conditions. */
- *e = (*c < mins[len]) << 6;
+ *e = (*c < mins[len]) << 6; // non-canonical encoding
*e |= ((*c >> 11) == 0x1b) << 7; // surrogate half?
+ *e |= (*c > 0x10FFFF) << 8; // out of range?
*e |= (s[1] & 0xc0) >> 2;
*e |= (s[2] & 0xc0) >> 4;
*e |= (s[3] ) >> 6;