summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorFredrik Noring <noring@nocrew.org>2019-08-04 08:07:42 +0200
committerMax Kellermann <max@musicpd.org>2019-08-10 10:45:02 +0200
commit7723c481db66f089f5ccde941a0a952cb1b16e2e (patch)
tree7bd82a7c5c32c931d31a656f9e648a0b242e46ec /src
parent0ed10542cc0ef7c590e3b8b9717ae26309dd494a (diff)
decoder/sidplay: Fix windows-1252 to utf-8 string conversion
High Voltage SID Collection (HVSC) metadata fields are encoded in windows-1252, as described in DOCUMENTS/SID_file_format.txt: https://www.hvsc.c64.org/download/C64Music/DOCUMENTS/SID_file_format.txt If utf-8 transcoding fails, or the ICU library is unavailable, fall back to plain ASCII and replace other characters with '?'.
Diffstat (limited to 'src')
-rw-r--r--src/decoder/plugins/SidplayDecoderPlugin.cxx62
1 files changed, 45 insertions, 17 deletions
diff --git a/src/decoder/plugins/SidplayDecoderPlugin.cxx b/src/decoder/plugins/SidplayDecoderPlugin.cxx
index d38010872..619ef38f6 100644
--- a/src/decoder/plugins/SidplayDecoderPlugin.cxx
+++ b/src/decoder/plugins/SidplayDecoderPlugin.cxx
@@ -25,6 +25,7 @@
#include "song/DetachedSong.hxx"
#include "fs/Path.hxx"
#include "fs/AllocatedPath.hxx"
+#include "lib/icu/Converter.hxx"
#ifdef HAVE_SIDPLAYFP
#include "fs/io/FileReader.hxx"
#include "util/RuntimeError.hxx"
@@ -32,6 +33,8 @@
#include "util/Macros.hxx"
#include "util/StringFormat.hxx"
#include "util/Domain.hxx"
+#include "util/AllocatedString.hxx"
+#include "util/CharUtil.hxx"
#include "system/ByteOrder.hxx"
#include "Log.hxx"
@@ -432,19 +435,46 @@ sidplay_file_decode(DecoderClient &client, Path path_fs)
} while (cmd != DecoderCommand::STOP);
}
+static AllocatedString<char>
+Windows1252ToUTF8(const char *s) noexcept
+{
+#ifdef HAVE_ICU_CONVERTER
+ try {
+ std::unique_ptr<IcuConverter>
+ converter(IcuConverter::Create("windows-1252"));
+
+ return converter->ToUTF8(s);
+ } catch (...) { }
+#endif
+
+ /*
+ * Fallback to not transcoding windows-1252 to utf-8, that may result
+ * in invalid utf-8 unless nonprintable characters are replaced.
+ */
+ auto t = AllocatedString<char>::Duplicate(s);
+
+ for (size_t i = 0; t[i] != AllocatedString<char>::SENTINEL; i++)
+ if (!IsPrintableASCII(t[i]))
+ t[i] = '?';
+
+ return t;
+}
+
gcc_pure
-static const char *
+static AllocatedString<char>
GetInfoString(const SidTuneInfo &info, unsigned i) noexcept
{
#ifdef HAVE_SIDPLAYFP
- return info.numberOfInfoStrings() > i
+ const char *s = info.numberOfInfoStrings() > i
? info.infoString(i)
- : nullptr;
+ : "";
#else
- return info.numberOfInfoStrings > i
+ const char *s = info.numberOfInfoStrings > i
? info.infoString[i]
- : nullptr;
+ : "";
#endif
+
+ return Windows1252ToUTF8(s);
}
static void
@@ -452,27 +482,25 @@ ScanSidTuneInfo(const SidTuneInfo &info, unsigned track, unsigned n_tracks,
TagHandler &handler) noexcept
{
/* title */
- const char *title = GetInfoString(info, 0);
- if (title == nullptr)
- title = "";
+ const auto title = GetInfoString(info, 0);
if (n_tracks > 1) {
const auto tag_title =
StringFormat<1024>("%s (%u/%u)",
- title, track, n_tracks);
- handler.OnTag(TAG_TITLE, tag_title);
+ title.c_str(), track, n_tracks);
+ handler.OnTag(TAG_TITLE, tag_title.c_str());
} else
- handler.OnTag(TAG_TITLE, title);
+ handler.OnTag(TAG_TITLE, title.c_str());
/* artist */
- const char *artist = GetInfoString(info, 1);
- if (artist != nullptr)
- handler.OnTag(TAG_ARTIST, artist);
+ const auto artist = GetInfoString(info, 1);
+ if (!artist.empty())
+ handler.OnTag(TAG_ARTIST, artist.c_str());
/* date */
- const char *date = GetInfoString(info, 2);
- if (date != nullptr)
- handler.OnTag(TAG_DATE, date);
+ const auto date = GetInfoString(info, 2);
+ if (!date.empty())
+ handler.OnTag(TAG_DATE, date.c_str());
/* track */
handler.OnTag(TAG_TRACK, StringFormat<16>("%u", track));