import java.io.InputStream; import java.io.InputStreamReader; import java.io.Reader; import java.net.URL; import java.util.Date; import java.util.concurrent.ExecutorService; import java.util.concurrent.Executors; import javax.sql.PooledConnection; import javax.xml.parsers.DocumentBuilder; import javax.xml.parsers.DocumentBuilderFactory; import javax.xml.xpath.XPath; import javax.xml.xpath.XPathConstants; import javax.xml.xpath.XPathFactory; import org.lobobrowser.html.UserAgentContext; import org.lobobrowser.html.parser.HtmlParser; import org.lobobrowser.html.test.SimpleUserAgentContext; import org.w3c.dom.Document; import org.w3c.dom.Element; import org.w3c.dom.NodeList; public class CellseaCategory { public final static String HOST_PREFIX = "http://www.cellsea.com/ringtone/lists/"; public final static String HOST_POSTFIX = "/recent/7/"; public final static int ITEM_PER_PAGE = 24; private static int timeThreshold; private static int fileIndex; private static String itemXpath = "//div[@id=\"basicpanel\"][1]//div"; private static int numThreadAlive; private static ExecutorService pool; private String mCategory; private int page; private String mURL; // for a category public CellseaCategory(String category) { mCategory = category; page = 0; mURL = HOST_PREFIX + mCategory + HOST_POSTFIX; } public void parser() { Reader reader = null; try { UserAgentContext uacontext = new SimpleUserAgentContext(); DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance(); DocumentBuilder builder = factory.newDocumentBuilder(); String url = mURL+page; URL httpLink = new URL(url); InputStream in = httpLink.openConnection().getInputStream(); reader = new InputStreamReader(in); Document document = builder.newDocument(); // Here is where we use Cobra's HTML parser. HtmlParser parser = new HtmlParser(uacontext, document); parser.parse(reader); // Now we use XPath to locate "a" elements that are // descendents of any "html" element. XPath xpath = XPathFactory.newInstance().newXPath(); int i; NodeList nodeList; nodeList = (NodeList) xpath.evaluate(itemXpath, document, XPathConstants.NODESET); for(i = 0; i < nodeList.getLength(); i++) { // for each music String info=null,date=null,size=null,pageURL=null; Element item = (Element) nodeList.item(i); NodeList propList = item.getElementsByTagName("div"); for(int j=0; j<propList.getLength(); j++) { // for music's each property Element elem = (Element)propList.item(j); if(j == 1) { NodeList urlList = (NodeList)elem.getElementsByTagName("a"); pageURL = ((Element)urlList.item(0)).getAttribute("href"); //System.out.println(url); } String val = elem.getTextContent().trim(); switch(j) { case 1: info = val; break; case 2: date = val; break; case 3: size = val; break; } } if(pageURL!=null && info!=null && date!=null && size!=null) { // judge date here if(!newEnough(date)) return ; CellseaThread thread = CellseaThread.createCellseaThread( mCategory, fileIndex++, pageURL, info, size); if(thread != null) { startThread(thread); } } } // render to next page if(nodeList.getLength() == ITEM_PER_PAGE) { page += ITEM_PER_PAGE; parser(); } }catch (Exception e) { System.out.println("xpath parse err"); e.printStackTrace(); }finally { if(reader != null) { try { reader.close(); } catch (Exception e2) { } } } } private void startThread(CellseaThread thread) { numThreadAlive ++; pool.execute(thread); } public static void threadFinish() { numThreadAlive --; } // Submitter:?Misterz Belieberz // posted:?14 days ago private boolean newEnough(String str) { int idx1 = str.indexOf(':'); int idx2 = str.indexOf(':', idx1+1); if(idx1==-1 || idx2==-1) { System.out.println("getDate() err"); return false; } String[] split = str.substring(idx2+2).split(" "); String unit = split[1]; int num = Integer.parseInt(split[0]); int days = 1000000; if(unit.equals("hours") || unit.equals("hour") || unit.equals("minute") || unit.equals("minutes")) { days = 0; }else if(unit.equals("days") || unit.equals("day")) { days = num; }else if(unit.equals("months") || unit.equals("month")) { days = 30*num; }else { System.out.println("time unit:"+unit); } //System.out.println(days); return days < timeThreshold; } //public final static String[] CATEGORY = new String[]{"Comedy"}; public final static String[] CATEGORY = new String[]{"Acoustic", "Alternative", "Anime", "Blues", "Classical", "Comedy", "Country", "Dance", "Electronic", "Funk", "Game", "Hard Rock", "Hip-Hop", "Humour", "Indie", "Instrumental", "Jazz", "Latin", "Musical", "Noise", "Oldies", "Opera", "Pop", "R_B", "Rap", "Rock", "Soundtrack", "Symphony", "Techno", "Trailer", "Vocal"}; public static void sync(Date lastSyncDate) { timeThreshold = (int)((new Date().getTime() - lastSyncDate.getTime())/(24*60*60*1000)); System.out.println("time threshold:"+timeThreshold); fileIndex = 1; numThreadAlive = 0; pool = Executors.newFixedThreadPool(3); System.out.println("\n"); for(int i=0; i<CATEGORY.length; i++) { System.out.println("Category:"+CATEGORY[i]); new CellseaCategory(CATEGORY[i]).parser(); } pool.shutdown(); while(numThreadAlive > 0) { try { Thread.sleep(10000); } catch (Exception e) { } } } }