import java.io.IOException;
import java.text.SimpleDateFormat;
import java.util.Date;
import java.util.Locale;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
public class FetchDataFromMabilo extends Thread{
private Date date;
private Date now;
private int numThreadAlive;
public FetchDataFromMabilo(Date date) {
this.date = date; // time of last update
this.now = date; // time of today
numThreadAlive = 0;
}
public Date getThisDate() {
return now;
}
private static final Pattern TOTAL_PATTERN = Pattern.compile(
"row2.*?src=\"(.*?)\".*?" // image
+ "href=\"(.*?)\">" // url
+ "(.*?)</a>.*?" // title
+ "Artist.*?>(.*?)</a>.*?" // artist
+ "Category.*?>(.*?)</a>.*?" // category
+ "style=\"width:(.*?)%;.*?" // rating
+ "<span>(.*?)\\sdownloads.*?" // downloads
+ "Added:\\s(.*?)</span>" // date
, Pattern.DOTALL);
private static final Pattern EACH_PATTERN = Pattern.compile(
"Size:</span>(.*?)<br.*?" // ring size
+ "det2.*?<a\\shref=\"(.*?)&title=" // ringtone url
, Pattern.DOTALL);
private static final String PROCEED = "http://www.mabilo.com/search/All-";
private static final String EXCEED = "-da.htm";
public static final String Ring_Download_Prefix = "http://music.mabilo.com/dl";
public void run() {
int page = 1;
String url = "";
boolean running = true;
ExecutorService pool = Executors.newFixedThreadPool(3);
while(running) {
try {
url = PROCEED+page+EXCEED;
Matcher all = TOTAL_PATTERN.matcher(Consts.fetchHtmlPage(url));
while(all.find()) {
String time = all.group(8);
int split = time.indexOf(',');
Date temp = Consts.SDF.parse(time.substring(0,split-2)+time.substring(split+1));
//System.out.println(temp.toString());
if(temp.after(date)) {
MusicInfo music = new MusicInfo();
music.setImageUrl(all.group(1));
music.setUrl(Consts.MABILO_BASE+all.group(2));
music.setTitle(all.group(3));
music.setArtist(all.group(4).trim());
music.setAlbum(all.group(5).replace("&", "n").replace("/", "_"));
music.setmScore(Integer.parseInt(all.group(6)));
music.setmCounts(Consts.String2Int(all.group(7)));
pool.execute(new ItemThread(music));
if(temp.after(now))
now = temp;
} else {
running = false;
break;
}
}
if(running) page++;
} catch (Exception e) {
page ++;
System.out.println(url+" get err");
e.printStackTrace();
}
}
pool.shutdown();
while(numThreadAlive != 0) {
try {
sleep(60000);
} catch (InterruptedException e) {
e.printStackTrace();
}
}
}
// get every music info, including: ring, image, download_counts, title...
class ItemThread implements Runnable {
private MusicInfo music;
public ItemThread(MusicInfo msc) {
this.music = msc;
}
public void run() {
numThreadAlive ++;
music.setImageName(getFilenameFromURL(music.getImageUrl()));
if(Consts.downloadTryMulTimes(music.getImageUrl(), Consts.NEW_DOWNLOAD_DIR+music.getImageName())) {
// if image downloaded successfully
try {
Matcher matcher = EACH_PATTERN.matcher(Consts.fetchHtmlPage(music.getUrl()));
while(matcher.find()) {
music.setSize(matcher.group(1).trim());
music.setDownloadUrl(Ring_Download_Prefix+matcher.group(2).substring(matcher.group(2).indexOf(".php")));
music.setRingName(matcher.group(2).substring(matcher.group(2).indexOf("file=")+5));
// download ring here
if(Consts.downloadTryMulTimes(music.getDownloadUrl(), Consts.NEW_DOWNLOAD_DIR+music.getRingName())) {
System.out.println(music.getTitle() + " download success");
// convert from mp3 to m4r
if(Mp3ToM4R.convert(music)) {
UploadAmazonS3 uploadAmazonS3 = new UploadAmazonS3(music);
uploadAmazonS3.run();
}else {
System.out.println(music.getRingName()+" convert err");
}
}
break;
}
} catch (IOException e) {
e.printStackTrace();
}
}
numThreadAlive --;
}
}
public static String getFilenameFromURL(String url) {
String[] que = url.split("/");
return que[que.length-1];
}
}