package com.xiaozhi.blog.service.page;
import java.io.BufferedInputStream;
import java.io.IOException;
import java.net.MalformedURLException;
import java.net.URL;
import java.util.ArrayList;
import java.util.List;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.jsoup.nodes.Document;
import org.jsoup.select.Elements;
import com.xiaozhi.blog.utils.VideoUtil;
import com.xiaozhi.blog.vo.Page;
public class HTMLParser {
private static Log logger = LogFactory.getLog(HTMLParser.class);
public static Page getHtmlInfo(String url) {
try {
if(!url.startsWith("http://"))url = "http://"+url;
Document doc = VideoUtil.getURLContent(url);
String title = doc.title();
Elements elements = doc.getElementsByTag("img");
if (elements != null && !elements.isEmpty()) {
List<String> imgs = new ArrayList<String>(elements.size());
for (int i = 0; i <= elements.size() - 1; i++) {
url = elements.get(i).attr("src");
if(!"".equals(url) && url.indexOf("http://")!=-1){
imgs.add(url);
}
}
Page page = new Page();
page.setTitle(title);
page.setUrl(url);
page.setImgs(imgs);
return page;
}
} catch (Exception e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
return null;
}
public static int getSize(String url){
BufferedInputStream in;
try {
URL resUrl = new URL(url);
in = new BufferedInputStream(resUrl.openStream());
int t;
int i=0;
while ((t = in.read()) != -1) {
i+=t;
}
logger.debug("------------------------------>"+i);
in.close();
} catch (MalformedURLException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}finally{
}
return 0;
}
}