diff options
author | Andrzej Rybczak <electricityispower@gmail.com> | 2016-10-30 23:25:51 +0100 |
---|---|---|
committer | Andrzej Rybczak <electricityispower@gmail.com> | 2016-10-30 23:25:51 +0100 |
commit | 888b6bc1c915cbb7895b79fbc7b88408b820f6ea (patch) | |
tree | 98847f96aa7048f42d0d6dd1741227898d83e567 | |
parent | a53c574edd2a13375091dc784643f5be5bbe5ce1 (diff) |
lyrics fetcher: fix fetchers and improve formatting of lyrics
-rw-r--r-- | NEWS | 1 | ||||
-rw-r--r-- | src/lyrics_fetcher.cpp | 21 | ||||
-rw-r--r-- | src/lyrics_fetcher.h | 4 | ||||
-rw-r--r-- | src/utility/html.cpp | 4 |
4 files changed, 26 insertions, 4 deletions
@@ -5,6 +5,7 @@ ncmpcpp-0.7.6 (????-??-??) * Opening playlist editor when there is no MPD playlists directory no longer freezes the application. * Added info about behavior of MPD_HOST and MPD_PORT environment variables to man page. * Tilde will now be expanded to home directory in visualizer_fifo_path, execute_on_song_change and external_editor configuration variables. +* Fixed lyricwiki and justsomelyrics fetchers. ncmpcpp-0.7.5 (2016-08-17) * Action chains can be now used for seeking. diff --git a/src/lyrics_fetcher.cpp b/src/lyrics_fetcher.cpp index 7e1859fe..f41a9a2b 100644 --- a/src/lyrics_fetcher.cpp +++ b/src/lyrics_fetcher.cpp @@ -25,7 +25,9 @@ #include <cstdlib> #include <cstring> +#include <boost/algorithm/string/join.hpp> #include <boost/algorithm/string/replace.hpp> +#include <boost/algorithm/string/split.hpp> #include <boost/algorithm/string/trim.hpp> #include <boost/regex.hpp> @@ -105,6 +107,15 @@ std::vector<std::string> LyricsFetcher::getContent(const char *regex_, const std void LyricsFetcher::postProcess(std::string &data) const { stripHtmlTags(data); + // Remove indentation from each line and collapse multiple newlines into one. + std::vector<std::string> lines; + boost::split(lines, data, boost::is_any_of("\r\n")); + for (auto &line : lines) + boost::trim(line); + std::unique(lines.begin(), lines.end(), [](std::string &a, std::string &b) { + return a.empty() && b.empty(); + }); + data = boost::algorithm::join(lines, "\n"); boost::trim(data); } @@ -126,7 +137,7 @@ LyricsFetcher::Result LyricwikiFetcher::fetch(const std::string &artist, const s return result; } - auto lyrics = getContent("<div class='lyricbox'>(.*?)<!--", data); + auto lyrics = getContent("<div class='lyricbox'>(.*?)</div>", data); if (lyrics.empty()) { @@ -224,6 +235,14 @@ void Sing365Fetcher::postProcess(std::string &data) const /**********************************************************************/ +void JustSomeLyricsFetcher::postProcess(std::string &data) const +{ + data = unescapeHtmlUtf8(data); + LyricsFetcher::postProcess(data); +} + +/**********************************************************************/ + void MetrolyricsFetcher::postProcess(std::string &data) const { // some of lyrics have both \n chars and <br />, html tags diff --git a/src/lyrics_fetcher.h b/src/lyrics_fetcher.h index b0ae542a..5222381b 100644 --- a/src/lyrics_fetcher.h +++ b/src/lyrics_fetcher.h @@ -108,7 +108,9 @@ struct JustSomeLyricsFetcher : public GoogleLyricsFetcher virtual const char *name() const OVERRIDE { return "justsomelyrics.com"; } protected: - virtual const char *regex() const OVERRIDE { return "<div class=\"content.*?</div>(.*?)</div>"; } + virtual const char *regex() const OVERRIDE { return "<div class=\"content.*?</div>\\s*</div>(.*?)<div"; } + + virtual void postProcess(std::string &data) const OVERRIDE; }; struct AzLyricsFetcher : public GoogleLyricsFetcher diff --git a/src/utility/html.cpp b/src/utility/html.cpp index d48ce4b4..341cd675 100644 --- a/src/utility/html.cpp +++ b/src/utility/html.cpp @@ -66,7 +66,7 @@ void stripHtmlTags(std::string &s) for (size_t i = s.find("<"); i != std::string::npos; i = s.find("<")) { size_t j = s.find(">", i)+1; - if (s.compare(i, j-i, "<p>") == 0 || s.compare(i, j-i, "</p>") == 0) + if (s.compare(i, std::min(3ul, j-i), "<p ") == 0 || s.compare(i, j-i, "</p>") == 0) s.replace(i, j-i, "\n"); else s.replace(i, j-i, ""); @@ -87,4 +87,4 @@ void stripHtmlTags(std::string &s) else if (s[i] == '\t') s[i] = ' '; } -}
\ No newline at end of file +} |