/*
Copyright (C) 2011 Josh Schreuder
This file is part of SMSnatcher.
SMSnatcher is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
SMSnatcher is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with SMSnatcher. If not, see <http://www.gnu.org/licenses/>.
*/
package model;
import java.io.IOException;
import java.util.HashMap;
import org.jsoup.Connection;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
public class SMDataParser {
private static HashMap<String, String> artistMap;
private static HashMap<String, HashMap<String, String>> songMap;
public static void parseArtistDirectory() {
artistMap = DataManager.getArtistMap();
artistMap.clear();
Logger.LogToStatusBar("Parsing artists from SongMeanings");
Logger.LogToStatusBar("Parsing symbol based names");
for(char c = 33 ; c <= 34 ; c++) {
Logger.LogToStatusBar("Parsing directory " + c);
String url = "http://www.songmeanings.net/artist/directory/" + c + "/?page=";
parseArtistDirectoryPage(url);
}
for(char c = 36 ; c <= 45 ; c++) {
Logger.LogToStatusBar("Parsing directory " + c);
String url = "http://www.songmeanings.net/artist/directory/" + c + "/?page=";
parseArtistDirectoryPage(url);
}
for(char c = 91 ; c <= 96 ; c++) {
Logger.LogToStatusBar("Parsing directory " + c);
String url = "http://www.songmeanings.net/artist/directory/" + c + "/?page=";
parseArtistDirectoryPage(url);
}
for(char c = 123 ; c <= 126 ; c++) {
Logger.LogToStatusBar("Parsing directory " + c);
String url = "http://www.songmeanings.net/artist/directory/" + c + "/?page=";
parseArtistDirectoryPage(url);
}
System.out.println("Parsing letter based names");
for(char c = 65 ; c <= 90 ; c++) {
Logger.LogToStatusBar("Parsing directory " + c);
String url = "http://www.songmeanings.net/artist/directory/" + c + "/?page=";
parseArtistDirectoryPage(url);
}
Logger.LogToStatusBar("Done parsing SongMeanings artists!");
}
public static int parseSongsPage(String artist, String artistURL) {
songMap = DataManager.getSongMap();
Logger.LogToStatusBar("Attempting to parse song titles from " + artistURL);
// Try to load page using Jsoup
try {
// Load page into Document
Connection c = Jsoup.connect(artistURL);
c.timeout(100000);
Document doc = c.get();
// Check for any errors
Elements errors = doc.select("strong");
if (errors.get(0).text().contains("This artist is currently not available.")) {
Logger.LogToStatusBar("There is an error on this page, removing this artist");
//artistMap.remove(artist);
return 0;
}
// No errors, get the song list
Elements songRows = doc.select("tbody#songslist tr td:eq(0)");
extractSongs(artist, songRows);
}
catch (IOException e) {
// TODO Auto-generated catch block
//e.printStackTrace();
Logger.LogToStatusBar("Songs page for " + artist + " not found!");
}
return 1;
}
public static void parseArtistDirectoryPage(String url) {
int pageNum = 1;
boolean done = false;
while(!done) {
String otherURL = url + pageNum;
Logger.LogToStatusBar("Parsing page " + pageNum);
// Try to load page using Jsoup
try {
// Load page into Document
Connection c = Jsoup.connect(otherURL);
c.timeout(100000);
Document doc = c.get();
Elements artistRows = doc.select("#fan-");
if(artistRows.size() <= 0) {
Logger.LogToStatusBar("No more artists found, bailing...");
done = true;
}
extractArtists(artistRows);
}
catch (IOException e) {
// TODO Auto-generated catch block
//e.printStackTrace();
Logger.LogToStatusBar("Directory " + url + " not found!");
done = true;
}
pageNum += 1;
}
}
public static void extractArtists(Elements rows) {
Elements links = rows.select("a[href]");
for(Element link : links) {
String linkURL = link.attr("abs:href");
String artist = link.text();
artistMap.put(artist, linkURL);
}
}
public static void extractSongs(String artist, Elements rows) {
Elements links = rows.select("a[href]");
for(Element link : links) {
String linkURL = link.attr("href");
String title = link.text();
HashMap<String, String> songs = songMap.get(artist);
Logger.LogToStatusBar("******************************");
Logger.LogToStatusBar("Adding \'" + title + "\' to songDB");
Logger.LogToStatusBar("******************************");
// Artist isn't in songMap yet...
if(songs == null) {
songs = new HashMap<String, String>();
songs.put(title, linkURL);
songMap.remove(artist);
songMap.put(artist, songs);
}
else {
songs.put(title, linkURL);
songMap.remove(artist);
songMap.put(artist, songs);
}
}
}
}