summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMax Kellermann <max@duempel.org>2009-02-27 09:02:32 +0100
committerMax Kellermann <max@duempel.org>2009-02-27 09:02:32 +0100
commit497c0b1c186f811f3a088230a2001d04cebd57ef (patch)
tree84086a89f7129ba73d1759c9d7cf1b346113a4f9
parentc1ab2d06aaa11823c0310e513b11654cfa67df02 (diff)
tag: don't accept invalid UTF-8 sequences
Overwrite invalid UTF-8 sequences with question marks.
-rw-r--r--src/tag.c32
1 files changed, 27 insertions, 5 deletions
diff --git a/src/tag.c b/src/tag.c
index 0e4b92373..960dd4b17 100644
--- a/src/tag.c
+++ b/src/tag.c
@@ -407,24 +407,46 @@ bool tag_equal(const struct tag *tag1, const struct tag *tag2)
return true;
}
+/**
+ * Replace invalid sequences with the question mark.
+ */
+static char *
+patch_utf8(const char *src, size_t length, const gchar *end)
+{
+ /* duplicate the string, and replace invalid bytes in that
+ buffer */
+ char *dest = g_strdup(src);
+
+ do {
+ dest[end - src] = '?';
+ } while (!g_utf8_validate(end + 1, (src + length) - (end + 1), &end));
+
+ return dest;
+}
+
static char *
fix_utf8(const char *str, size_t length)
{
+ const gchar *end;
char *temp;
gsize written;
assert(str != NULL);
- if (g_utf8_validate(str, length, NULL))
+ /* check if the string is already valid UTF-8 */
+ if (g_utf8_validate(str, length, &end))
return NULL;
- DEBUG("not valid utf8 in tag: %s\n",str);
+ /* no, it's not - try to import it from ISO-Latin-1 */
temp = g_convert(str, length, "utf-8", "iso-8859-1",
NULL, &written, NULL);
- if (temp == NULL)
- return NULL;
+ if (temp != NULL)
+ /* success! */
+ return temp;
- return temp;
+ /* no, still broken - there's no medication, just patch
+ invalid sequences */
+ return patch_utf8(str, length, end);
}
void tag_begin_add(struct tag *tag)