summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAndrzej Rybczak <electricityispower@gmail.com>2016-10-30 23:25:51 +0100
committerAndrzej Rybczak <electricityispower@gmail.com>2016-10-30 23:25:51 +0100
commit888b6bc1c915cbb7895b79fbc7b88408b820f6ea (patch)
tree98847f96aa7048f42d0d6dd1741227898d83e567
parenta53c574edd2a13375091dc784643f5be5bbe5ce1 (diff)
lyrics fetcher: fix fetchers and improve formatting of lyrics
-rw-r--r--NEWS1
-rw-r--r--src/lyrics_fetcher.cpp21
-rw-r--r--src/lyrics_fetcher.h4
-rw-r--r--src/utility/html.cpp4
4 files changed, 26 insertions, 4 deletions
diff --git a/NEWS b/NEWS
index c0d58002..f8d4dbd1 100644
--- a/NEWS
+++ b/NEWS
@@ -5,6 +5,7 @@ ncmpcpp-0.7.6 (????-??-??)
* Opening playlist editor when there is no MPD playlists directory no longer freezes the application.
* Added info about behavior of MPD_HOST and MPD_PORT environment variables to man page.
* Tilde will now be expanded to home directory in visualizer_fifo_path, execute_on_song_change and external_editor configuration variables.
+* Fixed lyricwiki and justsomelyrics fetchers.
ncmpcpp-0.7.5 (2016-08-17)
* Action chains can be now used for seeking.
diff --git a/src/lyrics_fetcher.cpp b/src/lyrics_fetcher.cpp
index 7e1859fe..f41a9a2b 100644
--- a/src/lyrics_fetcher.cpp
+++ b/src/lyrics_fetcher.cpp
@@ -25,7 +25,9 @@
#include <cstdlib>
#include <cstring>
+#include <boost/algorithm/string/join.hpp>
#include <boost/algorithm/string/replace.hpp>
+#include <boost/algorithm/string/split.hpp>
#include <boost/algorithm/string/trim.hpp>
#include <boost/regex.hpp>
@@ -105,6 +107,15 @@ std::vector<std::string> LyricsFetcher::getContent(const char *regex_, const std
void LyricsFetcher::postProcess(std::string &data) const
{
stripHtmlTags(data);
+ // Remove indentation from each line and collapse multiple newlines into one.
+ std::vector<std::string> lines;
+ boost::split(lines, data, boost::is_any_of("\r\n"));
+ for (auto &line : lines)
+ boost::trim(line);
+ std::unique(lines.begin(), lines.end(), [](std::string &a, std::string &b) {
+ return a.empty() && b.empty();
+ });
+ data = boost::algorithm::join(lines, "\n");
boost::trim(data);
}
@@ -126,7 +137,7 @@ LyricsFetcher::Result LyricwikiFetcher::fetch(const std::string &artist, const s
return result;
}
- auto lyrics = getContent("<div class='lyricbox'>(.*?)<!--", data);
+ auto lyrics = getContent("<div class='lyricbox'>(.*?)</div>", data);
if (lyrics.empty())
{
@@ -224,6 +235,14 @@ void Sing365Fetcher::postProcess(std::string &data) const
/**********************************************************************/
+void JustSomeLyricsFetcher::postProcess(std::string &data) const
+{
+ data = unescapeHtmlUtf8(data);
+ LyricsFetcher::postProcess(data);
+}
+
+/**********************************************************************/
+
void MetrolyricsFetcher::postProcess(std::string &data) const
{
// some of lyrics have both \n chars and <br />, html tags
diff --git a/src/lyrics_fetcher.h b/src/lyrics_fetcher.h
index b0ae542a..5222381b 100644
--- a/src/lyrics_fetcher.h
+++ b/src/lyrics_fetcher.h
@@ -108,7 +108,9 @@ struct JustSomeLyricsFetcher : public GoogleLyricsFetcher
virtual const char *name() const OVERRIDE { return "justsomelyrics.com"; }
protected:
- virtual const char *regex() const OVERRIDE { return "<div class=\"content.*?</div>(.*?)</div>"; }
+ virtual const char *regex() const OVERRIDE { return "<div class=\"content.*?</div>\\s*</div>(.*?)<div"; }
+
+ virtual void postProcess(std::string &data) const OVERRIDE;
};
struct AzLyricsFetcher : public GoogleLyricsFetcher
diff --git a/src/utility/html.cpp b/src/utility/html.cpp
index d48ce4b4..341cd675 100644
--- a/src/utility/html.cpp
+++ b/src/utility/html.cpp
@@ -66,7 +66,7 @@ void stripHtmlTags(std::string &s)
for (size_t i = s.find("<"); i != std::string::npos; i = s.find("<"))
{
size_t j = s.find(">", i)+1;
- if (s.compare(i, j-i, "<p>") == 0 || s.compare(i, j-i, "</p>") == 0)
+ if (s.compare(i, std::min(3ul, j-i), "<p ") == 0 || s.compare(i, j-i, "</p>") == 0)
s.replace(i, j-i, "\n");
else
s.replace(i, j-i, "");
@@ -87,4 +87,4 @@ void stripHtmlTags(std::string &s)
else if (s[i] == '\t')
s[i] = ' ';
}
-} \ No newline at end of file
+}