/* You may freely copy, distribute, modify and use this class as long as the original author attribution remains intact. See message below. Copyright (C) 2005 Christian Pesch. All Rights Reserved. */ package slash.metamusic.lyricsdb; import slash.metamusic.lyricwiki.LyricWikiLocator; import slash.metamusic.lyricwiki.LyricWikiPortType; import slash.metamusic.lyricwiki.LyricsResult; import slash.metamusic.util.URLLoader; import java.io.File; import java.io.IOException; import java.io.UnsupportedEncodingException; import java.net.URL; import java.util.logging.Logger; import static slash.metamusic.util.StringHelper.*; /** * A client that queries lyrics databases. * * @author Christian Pesch */ public class LyricsDBClient { /** * Logging output */ protected static final Logger log = Logger.getLogger(LyricsDBClient.class.getName()); private LyricsDBCache lyricsDBCache = new LyricsDBCache(); public void setLyricsDirectoryName(String lyricsDirectoryName) { lyricsDBCache.setCacheDirectoryName(lyricsDirectoryName); } private static String encode(String request) throws UnsupportedEncodingException { return toMixedCase(request).replace(" ", "_"); } public File getCachedFile(String artist, String track) { try { return lyricsDBCache.getCachedFile(artist, track); } catch (IOException e) { log.severe("Cannot get cached lyrics for artist '" + artist + "' and track '" + track + "': " + e.getMessage()); } return null; } protected String fetchLyrics(String artist, String track, boolean download) { if (artist == null || artist.length() == 0 || artist.toLowerCase().contains("unknown") || track == null || track.length() == 0 || track.toLowerCase().contains("unknown")) { log.severe("Cannot download lyrics for unknown artist '" + artist + "' or unknown track '" + track + "'"); return null; } // TODO strip of (feat. Bla) from track // TODO strip of (radio version) from track String lyrics = null; try { lyrics = lyricsDBCache.peekLyrics(artist, track); if (lyrics != null) { log.info("Lyrics for artist '" + artist + "' and track '" + track + "' (" + lyrics.length() + " bytes) is cached"); } else if (download) { lyrics = scrapeLyrics(artist, track); if (lyrics == null) lyrics = downloadLyrics(artist, track); if (lyrics != null) storeLyrics(artist, track, lyrics); } } catch (IOException e) { log.severe("Cannot fetch lyrics for artist '" + artist + "' and track '" + track + "': " + e.getMessage()); } return trimButKeepLineFeeds(lyrics); } public String fetchLyrics(String artist, String track) { return fetchLyrics(artist, track, true); } public void storeLyrics(String artist, String track, String lyrics) { log.fine("Storing lyrics (" + lyrics.length() + " bytes) for artist '" + artist + "' and track '" + track + "'"); try { lyricsDBCache.storeLyrics(artist, track, lyrics); } catch (IOException e) { log.severe("Cannot store lyrics for artist '" + artist + "' and track '" + track + "': " + e.getMessage()); } } protected String scrapeLyrics(String artist, String track) { try { String spec = "http://lyrics.wikia.com/" + encode(artist) + ":" + encode(track); URL url = new URL(spec); String html = URLLoader.getContents(url, false); html = new String(html.getBytes(), "ISO-8859-1"); return extractLyrics(html); } catch (Exception e) { log.severe("Cannot scrape lyrics: " + e.getMessage()); } return null; } public String cleanLyrics(String lyrics) { if (lyrics == null || lyrics.contains("{{A")) return null; lyrics = lyrics.replaceAll("�", "'"); lyrics = lyrics.replaceAll("\r\n", "\n"); lyrics = lyrics.replaceAll("<br />", "\n"); lyrics = lyrics.replaceAll("\n", "\r\n"); lyrics = lyrics.replaceAll("<b>", "").replaceAll("</b>", ""); lyrics = lyrics.replaceAll("<i>", "").replaceAll("</i>", ""); lyrics = decodeEntities(lyrics); lyrics = trimButKeepLineFeeds(lyrics); int shortened = lyrics.indexOf("[...]"); if (shortened != -1) lyrics = lyrics.substring(0, shortened + 5); int notLicensed = lyrics.indexOf("not licensed to display the full lyrics"); if (notLicensed != -1) lyrics = lyrics.substring(0, notLicensed); if (lyrics.startsWith("Not found")) return null; return lyrics; } String extractLyrics(String html) { String[] before = html.split("width='16' height='17'/></a></div>"); if (before.length > 1) { String[] after = before[1].split("<!--"); if (after.length > 0) { String lyrics = after[0]; lyrics = cleanLyrics(lyrics); if (lyrics != null && !lyrics.contains("<div class='lyricsbreak'>")) return lyrics; } } return null; } protected String downloadLyrics(String artist, String track) { if (lyricsDBCache.hasDownloadAlreadyFailed(artist, track)) { log.fine("Lyrics download already failed for artist '" + artist + "' and track '" + track + "'"); return null; } try { LyricWikiLocator service = new LyricWikiLocator(); LyricWikiPortType port = service.getLyricWikiPort(); LyricsResult lyricsResult = port.getSong(artist, track); if (lyricsResult != null) { String lyrics = lyricsResult.getLyrics(); lyrics = new String(lyrics.getBytes("ISO-8859-1"), "UTF-8"); lyrics = cleanLyrics(lyrics); if (lyrics != null) return lyrics; } } catch (Exception e) { log.severe("Cannot download lyrics: " + e.getMessage()); } log.fine("Lyrics download failed for artist '" + artist + "' and track '" + track + "'"); lyricsDBCache.addFailedDownload(artist, track); return null; } public static void main(String[] args) throws Exception { if (args.length != 2) { System.out.println("slash.metamusic.lyricsdb.LyricsDBClient <artist> <track>"); System.exit(1); } LyricsDBClient client = new LyricsDBClient(); String lyrics = client.downloadLyrics(args[0], args[1]); System.out.println(lyrics); System.exit(0); } }