package com.hao.parser;
import com.hao.model.StopSale;
import com.google.common.collect.Lists;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.IOException;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import static com.hao.common.Commons.*;
/**
* Created by user on 2016/2/18.
*/
public class ParserStopSalePage {
public static final Logger logger = LoggerFactory.getLogger(ParserStopSalePage.class);
public static final Pattern pattern = Pattern.compile("(\\d+)");
/**
* 解析停售页面的数据
* 包括获取参数配置页面的链接
* @param url 停售页面链接
* @param path 停售页面参数配置链接保存路径
* @throws IOException
*/
public static List<StopSale> parseStopSaleData(String url,String path) throws IOException {
try {
Document document = getDocument(url);
List<StopSale> lists = Lists.newArrayList();
//这部分用来获取参数配置
Elements configElems = document.select(".models_nav");
for (Element configElem : configElems) {
String href = configElem.select("a").get(1).attr("href");
String link = "http://www.autohome.com.cn/" + href;
}
//这部分用来将数据组装成一个stopSale的model对象
Elements carElems = document.select(".car_price");
for (int i = 0; i < carElems.size(); i++) {
StopSale stopSale = new StopSale();
String id = null;
Matcher matcher = pattern.matcher(url);
if (matcher.find()) {
id = matcher.group(1);
}
String carName = document.select(".subnav-title-name > a").text();
String year = carElems.get(i).select("span").get(0).text();
String advicePrice = carElems.get(i).select("span > strong").get(0).text();
String usedPrice = carElems.get(i).select("span > strong").get(1).text();
String link = configElems.get(i).select("a").get(1).attr("href");
link = "http://www.autohome.com.cn/" + link;
stopSale.setYear(year);
stopSale.setAdvicePrice(advicePrice);
stopSale.setCarName(carName);
stopSale.setLink(link);
stopSale.setUsedPrice(usedPrice);
stopSale.setId(id);
lists.add(stopSale);
}
return lists;
} catch (Exception e){
if(e instanceof IllegalArgumentException){
writeStringtoFile("error_url.txt",url + "\n",true);
return null;
}else{
throw e;
}
}
}
}