package music.search;
import java.io.IOException;
import java.io.UnsupportedEncodingException;
import java.net.URLDecoder;
import java.net.URLEncoder;
import java.util.ArrayList;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import music.info.MusicInfo;
public class GoogleMusicSearcher extends MusicSearcher
{
private static final Pattern ROW_PATTERN = Pattern.compile("<tbody(.*?)</tbody>", Pattern.DOTALL);
private static final Pattern PATTERN = Pattern.compile(
//"<td\\sclass=\"Title\".*?\">(.*?)</a>\\s</td>.*?" + // Title
"<td\\sclass=\"Title.*?\"><a.*?\">(.*?)</a>\\s</td>.*?" +
"(\\(.*?\\))</a></td>.*?" + // Artist
//"Album\"><a\\shref=\"(.*?)&.*?" +
"《(.*?)》.*?"+ // Album
"(http://g.top.*?).{2}26resnum" // url
,Pattern.DOTALL);
private static final Pattern MUTIL_ARTIST_PATTERN = Pattern.compile("\\((.*?)\\)", Pattern.DOTALL);
private static final Pattern DOWN_PATTERN = Pattern.compile(
"src=\"(http://lh.*?)\".*?" + // ignore
"td-size\">([0-9].*?)</td>.*?" + // size
"q=(.*?)&", // ring url
Pattern.DOTALL);
private static final String googleString = "http://www.google.cn";
private static final String downldString = "http://www.google.cn/music/top100/musicdownload?";
public GoogleMusicSearcher()
{
super("http://www.google.cn/music/search?q=","utf-8");
}
@Override
public ArrayList<MusicInfo> getMusicList(String listPage) throws Exception
{
ArrayList<MusicInfo> musicList = new ArrayList<MusicInfo>();
Matcher matcherSingle = ROW_PATTERN.matcher(listPage);
while(matcherSingle.find() )
{
String content = matcherSingle.group(1);
//System.out.println(content);
Matcher music = PATTERN.matcher(content);
while(music.find())
{
MusicInfo info = new MusicInfo();
//System.out.println(music.group(1));
info.setTitle(procString(music.group(1)));
info.setArtist(procArtist(music.group(2)));
info.setAlbum(procString(music.group(3)));
info.setUrl(URLDecoder.decode(downldString+music.group(4).substring(music.group(4).indexOf("id"))));
//System.out.println(info.getUrl());
if(!inList(musicList, info))
musicList.add(info);
}
}
return musicList;
}
public static String procArtist(String orig) throws Exception
{
StringBuffer buffer = new StringBuffer();
Matcher artist = MUTIL_ARTIST_PATTERN.matcher(orig);
boolean flag = true;
while(artist.find())
{
//System.out.println(artist.group(1));
if(flag)
{
buffer.append(artist.group(1));
flag = false ;
}
else
buffer.append(","+artist.group(1));
}
return changeCharset(buffer.toString());
}
public static String procString(String orig) throws Exception
{
return changeCharset(URLDecoder.decode(orig.replace("<b>"," ").replace("</b>"," ").trim()));//.replace("'", "'");
}
@Override
public void getDownloadUrl(String downloadPage, MusicInfo info)
{
Matcher matcher = DOWN_PATTERN.matcher(downloadPage);
while(matcher.find())
{
// info.setImageUrl(matcher.group(1));
info.setFileSize(procFileSize(matcher.group(2)));
info.setDownloadUrl(URLDecoder.decode(matcher.group(3)));
/*
System.out.println(matcher.group(1));
System.out.println(URLDecoder.decode(matcher.group(2)));
*/
break;
}
}
public static int procFileSize(String sizeString)
{
double size = Double.parseDouble(sizeString.substring(0,sizeString.indexOf('&')));
if(sizeString.endsWith("MB"))
size *= 1000000;
else if(sizeString.endsWith("KB"))
size *= 1000;
return (int)size;
}
public static String changeCharset(String raw) throws UnsupportedEncodingException {
//String raw="墨尔本告别巡回演唱会";
//raw=raw.replaceAll("","");
String[] pre=raw.split(";");
StringBuffer r=new StringBuffer();
int i,start,stop;
for(String s:pre)
{
if((start=s.indexOf("")) != -1)
{
r.append(s.substring(0, start));
for(stop=start+2; stop<s.length()&&Character.isDigit(s.charAt(stop)); stop++) ;
i=Integer.parseInt(s.substring(start+2, stop));
r.append((char)i);
r.append(s.substring(stop));
}
else
r.append(s);
}
return r.toString();
}
}