package com.cinderella.musicsearch; import java.io.IOException; import java.io.UnsupportedEncodingException; import java.net.URLDecoder; import java.net.URLEncoder; import java.util.ArrayList; import java.util.regex.Matcher; import java.util.regex.Pattern; import android.content.Context; import android.text.TextUtils; import android.util.Log; public class SogouMusicSearcher { private static final String URL_SEARCH = "http://mp3.sogou.com/music.so?pf=mp3&query="; private static final String URL_SEARCH_PROXY = "http://chaowebs.appspot.com/msearch/music.so?pf=mp3&query="; private static final String SOGOU_MP3 = "http://mp3.sogou.com"; private static final Pattern PATTERN_ROW = Pattern.compile("<tr(.*?)</tr>", Pattern.DOTALL); private static final Pattern PATTERN = Pattern.compile( "<td.*?\\btitle=\"([^\"]*)\".*?" + // 1 "<td.*?\\bsinger=\"([^\"]*)\".*?" + // 2 "<td.*?\\btitle=\"([^\"]*)\".*?" + // 3 "<td.*?</td>.*?" + // Ignore "<td.*?\'(/down.so.*?)\'.*?" + // 4 // TODO(zyu): In some cases, lyrics are empty. Temporily ignore lyrics. // "<td.*?href=\"([^\"]*)\".*?" + // 5 "<td>(.*?)</td>.*?" + // 5 "<td.*?</td>.*?" + // Ignore "<td.*?>([^<]*)<.*?" + // 6 "<td.*?>([^<]*)<" + // 7 "" , Pattern.DOTALL); private static final Pattern PATTERN_DOWNLOAD_URL = Pattern.compile("href=\"([^\"]*)\""); private static final String DOWNLOAD_MARKER = "下载歌曲"; private String mSearchUrl; private String mProxyUrl; private int mPage; // Next page to fetch. private int mCookie_id = 0; private static final String CODING = "gb2312"; public SogouMusicSearcher() { } public void setQuery(String query) { mPage = 1; try { mSearchUrl = URL_SEARCH + URLEncoder.encode(query, "gb2312"); mProxyUrl = URL_SEARCH_PROXY + URLEncoder.encode(query, "gb2312"); } catch (UnsupportedEncodingException e) { mSearchUrl = URL_SEARCH + URLEncoder.encode(query); mProxyUrl = URL_SEARCH_PROXY + URLEncoder.encode(query); } } private String getNextUrl() { return mPage == 1 ? mSearchUrl : mSearchUrl + "&page=" + mPage; } private ArrayList<MusicInfo> getMusicInfoListFromHtml(String html) throws UnsupportedEncodingException { Utils.D("+++++++++++++++"); Utils.D(html); Utils.D("+++++++++++++++"); ArrayList<MusicInfo> musicList = new ArrayList<MusicInfo>(); Matcher matcherRow = PATTERN_ROW.matcher(html); while (matcherRow.find()) { Matcher m = PATTERN.matcher(matcherRow.group(1)); while (m.find()) { MusicInfo info = new MusicInfo(); info.setTitle(m.group(1).trim()); info.setArtist(URLDecoder.decode(m.group(2), "gb2312").trim()); info.setAlbum(m.group(3).trim()); info.setUrl(SOGOU_MP3 + m.group(4).trim()); // info.setLyricUrl(SOGOU_MP3 + m.group(5).trim()); String displayFileSize = m.group(6).trim(); if (displayFileSize.equals("未知")) displayFileSize = "Unknown size"; info.setDisplayFileSize(displayFileSize); info.setType(m.group(7).trim()); musicList.add(info); } } Utils.D("Exit getMusicInfoListFromHtml"); return musicList; } public int getCurPage() { return mPage-1; } // Returns null when something wrong happens. public ArrayList<MusicInfo> getNextResultList() { if (mPage > 0) { try { Thread.sleep(2000); } catch (InterruptedException e) { // TODO Auto-generated catch block e.printStackTrace(); } } try { String html = NetUtils.fetchHtmlPage(mCookie_id, getNextUrl(), "gb2312"); if (TextUtils.isEmpty(html)) return null; ArrayList<MusicInfo> musicList = getMusicInfoListFromHtml(html); if (musicList.size() > 0) { mPage++; return musicList; } /* else if (!sUseProxy && mPage == 1) { // Give it one more chance. sUseProxy = true; Log.i(Utils.TAG, "Switching to proxy mode"); html = NetUtils.fetchHtmlPage(getNextUrl(), "gb2312"); musicList = getMusicInfoListFromHtml(html); if (musicList.size() > 0) { mPage++; } } */ return musicList; } catch (Exception e) { e.printStackTrace(); return null; } } // Returns null when something wrong happens. public ArrayList<MusicInfo> getPrevResultList() { if (mPage == 0) { return null; } if (mPage > 0) { try { Thread.sleep(2000); } catch (InterruptedException e) { e.printStackTrace(); } } try { String html = NetUtils.fetchHtmlPage(mCookie_id, getNextUrl(), CODING); if (TextUtils.isEmpty(html)) return null; ArrayList<MusicInfo> musicList = getMusicInfoListFromHtml(html); if (musicList.size() > 0) { mPage--; return musicList; } return musicList; } catch (Exception e) { e.printStackTrace(); return null; } } public void setMusicDownloadUrl(MusicInfo info) { try { String html = NetUtils.fetchHtmlPage(mCookie_id, info.getUrl(), "gb2312"); info.setDownloadUrl(getSogouLinkList(html)); } catch (Exception e) { // TODO Auto-generated catch block e.printStackTrace(); } } public static ArrayList<String> getSogouLinkList(String html) { ArrayList<String> linkList = new ArrayList<String>(); final Pattern LinkPattern = Pattern.compile("(http://.*?.mp3[^\"]*)\""); Matcher matcher = LinkPattern.matcher(html); while(matcher.find()) { String candidate = matcher.group(1); if (candidate.contains("</a>")) { continue; } if (isInList(candidate, linkList)) { continue; } linkList.add(candidate); } return linkList; } public static boolean isInList(String link, ArrayList<String> list) { for (String item : list) { if (link.equalsIgnoreCase(item)) { return true; } } return false; } }