diff options
author | Fredrik Noring <noring@nocrew.org> | 2019-08-04 08:07:42 +0200 |
---|---|---|
committer | Max Kellermann <max@musicpd.org> | 2019-08-10 10:45:02 +0200 |
commit | 7723c481db66f089f5ccde941a0a952cb1b16e2e (patch) | |
tree | 7bd82a7c5c32c931d31a656f9e648a0b242e46ec /src | |
parent | 0ed10542cc0ef7c590e3b8b9717ae26309dd494a (diff) |
decoder/sidplay: Fix windows-1252 to utf-8 string conversion
High Voltage SID Collection (HVSC) metadata fields are encoded in
windows-1252, as described in DOCUMENTS/SID_file_format.txt:
https://www.hvsc.c64.org/download/C64Music/DOCUMENTS/SID_file_format.txt
If utf-8 transcoding fails, or the ICU library is unavailable, fall
back to plain ASCII and replace other characters with '?'.
Diffstat (limited to 'src')
-rw-r--r-- | src/decoder/plugins/SidplayDecoderPlugin.cxx | 62 |
1 files changed, 45 insertions, 17 deletions
diff --git a/src/decoder/plugins/SidplayDecoderPlugin.cxx b/src/decoder/plugins/SidplayDecoderPlugin.cxx index d38010872..619ef38f6 100644 --- a/src/decoder/plugins/SidplayDecoderPlugin.cxx +++ b/src/decoder/plugins/SidplayDecoderPlugin.cxx @@ -25,6 +25,7 @@ #include "song/DetachedSong.hxx" #include "fs/Path.hxx" #include "fs/AllocatedPath.hxx" +#include "lib/icu/Converter.hxx" #ifdef HAVE_SIDPLAYFP #include "fs/io/FileReader.hxx" #include "util/RuntimeError.hxx" @@ -32,6 +33,8 @@ #include "util/Macros.hxx" #include "util/StringFormat.hxx" #include "util/Domain.hxx" +#include "util/AllocatedString.hxx" +#include "util/CharUtil.hxx" #include "system/ByteOrder.hxx" #include "Log.hxx" @@ -432,19 +435,46 @@ sidplay_file_decode(DecoderClient &client, Path path_fs) } while (cmd != DecoderCommand::STOP); } +static AllocatedString<char> +Windows1252ToUTF8(const char *s) noexcept +{ +#ifdef HAVE_ICU_CONVERTER + try { + std::unique_ptr<IcuConverter> + converter(IcuConverter::Create("windows-1252")); + + return converter->ToUTF8(s); + } catch (...) { } +#endif + + /* + * Fallback to not transcoding windows-1252 to utf-8, that may result + * in invalid utf-8 unless nonprintable characters are replaced. + */ + auto t = AllocatedString<char>::Duplicate(s); + + for (size_t i = 0; t[i] != AllocatedString<char>::SENTINEL; i++) + if (!IsPrintableASCII(t[i])) + t[i] = '?'; + + return t; +} + gcc_pure -static const char * +static AllocatedString<char> GetInfoString(const SidTuneInfo &info, unsigned i) noexcept { #ifdef HAVE_SIDPLAYFP - return info.numberOfInfoStrings() > i + const char *s = info.numberOfInfoStrings() > i ? info.infoString(i) - : nullptr; + : ""; #else - return info.numberOfInfoStrings > i + const char *s = info.numberOfInfoStrings > i ? info.infoString[i] - : nullptr; + : ""; #endif + + return Windows1252ToUTF8(s); } static void @@ -452,27 +482,25 @@ ScanSidTuneInfo(const SidTuneInfo &info, unsigned track, unsigned n_tracks, TagHandler &handler) noexcept { /* title */ - const char *title = GetInfoString(info, 0); - if (title == nullptr) - title = ""; + const auto title = GetInfoString(info, 0); if (n_tracks > 1) { const auto tag_title = StringFormat<1024>("%s (%u/%u)", - title, track, n_tracks); - handler.OnTag(TAG_TITLE, tag_title); + title.c_str(), track, n_tracks); + handler.OnTag(TAG_TITLE, tag_title.c_str()); } else - handler.OnTag(TAG_TITLE, title); + handler.OnTag(TAG_TITLE, title.c_str()); /* artist */ - const char *artist = GetInfoString(info, 1); - if (artist != nullptr) - handler.OnTag(TAG_ARTIST, artist); + const auto artist = GetInfoString(info, 1); + if (!artist.empty()) + handler.OnTag(TAG_ARTIST, artist.c_str()); /* date */ - const char *date = GetInfoString(info, 2); - if (date != nullptr) - handler.OnTag(TAG_DATE, date); + const auto date = GetInfoString(info, 2); + if (!date.empty()) + handler.OnTag(TAG_DATE, date.c_str()); /* track */ handler.OnTag(TAG_TRACK, StringFormat<16>("%u", track)); |