summaryrefslogtreecommitdiff
path: root/src/lib
diff options
context:
space:
mode:
authorMax Kellermann <max@duempel.org>2014-05-12 14:35:25 +0200
committerMax Kellermann <max@duempel.org>2014-05-12 14:43:30 +0200
commit41507d81291e215b2aec6dfcb9bc7ed7063853d5 (patch)
treedcac711d385dbd16036661a640fa545d5385651f /src/lib
parent317a98a5a9b1b2a2fbffd23925b5075338748700 (diff)
icu/Collate: use u_strFoldCase() instead of ucol_getSortKey()
Turns out ucol_getSortKey() does not what I thought it does.
Diffstat (limited to 'src/lib')
-rw-r--r--src/lib/icu/Collate.cxx51
1 files changed, 41 insertions, 10 deletions
diff --git a/src/lib/icu/Collate.cxx b/src/lib/icu/Collate.cxx
index fac73783b..4d1526b28 100644
--- a/src/lib/icu/Collate.cxx
+++ b/src/lib/icu/Collate.cxx
@@ -23,6 +23,7 @@
#ifdef HAVE_ICU
#include "Error.hxx"
#include "util/WritableBuffer.hxx"
+#include "util/ConstBuffer.hxx"
#include "util/Error.hxx"
#include "util/Domain.hxx"
@@ -97,6 +98,28 @@ UCharFromUTF8(const char *src)
return { dest, size_t(dest_length) };
}
+static WritableBuffer<char>
+UCharToUTF8(ConstBuffer<UChar> src)
+{
+ assert(!src.IsNull());
+
+ /* worst-case estimate */
+ size_t dest_capacity = 4 * src.size;
+
+ char *dest = new char[dest_capacity];
+
+ UErrorCode error_code = U_ZERO_ERROR;
+ int32_t dest_length;
+ u_strToUTF8(dest, dest_capacity, &dest_length, src.data, src.size,
+ &error_code);
+ if (U_FAILURE(error_code)) {
+ delete[] dest;
+ return nullptr;
+ }
+
+ return { dest, size_t(dest_length) };
+}
+
#endif
gcc_pure
@@ -147,19 +170,27 @@ IcuCaseFold(const char *src)
if (u.IsNull())
return std::string(src);
- size_t dest_length = ucol_getSortKey(collator, u.data, u.size,
- nullptr, 0);
- if (dest_length == 0) {
- delete[] u.data;
+ size_t folded_capacity = u.size * 2u;
+ UChar *folded = new UChar[folded_capacity];
+
+ UErrorCode error_code = U_ZERO_ERROR;
+ size_t folded_length = u_strFoldCase(folded, folded_capacity,
+ u.data, u.size,
+ U_FOLD_CASE_DEFAULT,
+ &error_code);
+ delete[] u.data;
+ if (folded_length == 0 || error_code != U_ZERO_ERROR) {
+ delete[] folded;
return std::string(src);
}
- uint8_t *dest = new uint8_t[dest_length];
- ucol_getSortKey(collator, u.data, u.size,
- dest, dest_length);
- delete[] u.data;
- std::string result((const char *)dest);
- delete[] dest;
+ auto result2 = UCharToUTF8({folded, folded_length});
+ delete[] folded;
+ if (result2.IsNull())
+ return std::string(src);
+
+ std::string result(result2.data, result2.size);
+ delete[] result2.data;
#elif defined(HAVE_GLIB)
char *tmp = g_utf8_casefold(src, -1);
std::string result(tmp);