diff options
author | Max Kellermann <max@duempel.org> | 2009-02-27 09:02:32 +0100 |
---|---|---|
committer | Max Kellermann <max@duempel.org> | 2009-02-27 09:02:32 +0100 |
commit | 497c0b1c186f811f3a088230a2001d04cebd57ef (patch) | |
tree | 84086a89f7129ba73d1759c9d7cf1b346113a4f9 | |
parent | c1ab2d06aaa11823c0310e513b11654cfa67df02 (diff) |
tag: don't accept invalid UTF-8 sequences
Overwrite invalid UTF-8 sequences with question marks.
-rw-r--r-- | src/tag.c | 32 |
1 files changed, 27 insertions, 5 deletions
@@ -407,24 +407,46 @@ bool tag_equal(const struct tag *tag1, const struct tag *tag2) return true; } +/** + * Replace invalid sequences with the question mark. + */ +static char * +patch_utf8(const char *src, size_t length, const gchar *end) +{ + /* duplicate the string, and replace invalid bytes in that + buffer */ + char *dest = g_strdup(src); + + do { + dest[end - src] = '?'; + } while (!g_utf8_validate(end + 1, (src + length) - (end + 1), &end)); + + return dest; +} + static char * fix_utf8(const char *str, size_t length) { + const gchar *end; char *temp; gsize written; assert(str != NULL); - if (g_utf8_validate(str, length, NULL)) + /* check if the string is already valid UTF-8 */ + if (g_utf8_validate(str, length, &end)) return NULL; - DEBUG("not valid utf8 in tag: %s\n",str); + /* no, it's not - try to import it from ISO-Latin-1 */ temp = g_convert(str, length, "utf-8", "iso-8859-1", NULL, &written, NULL); - if (temp == NULL) - return NULL; + if (temp != NULL) + /* success! */ + return temp; - return temp; + /* no, still broken - there's no medication, just patch + invalid sequences */ + return patch_utf8(str, length, end); } void tag_begin_add(struct tag *tag) |