package com.deepnighttwo.aircondition.acsum.util;
import static com.google.appengine.api.taskqueue.TaskOptions.Builder.withUrl;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Calendar;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import com.deepnighttwo.aircondition.acsum.dao.ACSumDAOManager;
import com.deepnighttwo.aircondition.acsum.dao.AirConditionSum;
import com.deepnighttwo.aircondition.exception.HTMLParseException;
import com.deepnighttwo.aircondition.util.CalendarUtil;
import com.deepnighttwo.aircondition.util.FetchURLUtil;
import com.google.appengine.api.taskqueue.Queue;
import com.google.appengine.api.taskqueue.QueueFactory;
import com.google.appengine.api.taskqueue.TaskOptions.Method;
public class ACSumHTMLContentUtil {
public static final String PARAM_START_DATE = "startDate";
public static final String PARAM_END_DATE = "endDate";
public static void pushACSumTaskToQueue(Calendar start, Calendar end) {
String startStr = CalendarUtil.getDateStringFromCalendar(start);
String endStr = CalendarUtil.getDateStringFromCalendar(end);
Queue initializerWorkerQueue = QueueFactory
.getQueue("acsuminitializerworker");
initializerWorkerQueue.add(withUrl(
"/dataretrieve/acsumRetrieveData?" + PARAM_START_DATE + "="
+ startStr + "&" + PARAM_END_DATE + "=" + endStr)
.method(Method.GET));
}
/**
* try to retrieve html content based on the given time window. parse the
* html content, get AirConditionSum and store them into db
*
* @param start
* @param end
* @return
* @throws IOException
* Socket error most likely, can be recovered by retry;
* @throws HTMLParseException
* target html content format is changed, need to modify code.
*/
public static String getACSumData(Calendar start, Calendar end)
throws IOException, HTMLParseException {
Map<String, String> params = new HashMap<String, String>();
params.put("Tdate", CalendarUtil.getDateStringFromCalendar(end));
params.put("Fdate", CalendarUtil.getDateStringFromCalendar(start));
String htmlContent;
htmlContent = FetchURLUtil.getContentUsingPost(
"http://www.envir.gov.cn/airnews/index.asp", params, "GBK");
Document doc = Jsoup.parse(htmlContent);
Elements eles = doc.select("table[bordercolor] > tr");
if (eles.size() == 0) {
throw new HTMLParseException(
"No row returned. May caused by socket error or target html format is changed: "
+ CalendarUtil.getDateStringFromCalendar(start)
+ " to "
+ CalendarUtil.getDateStringFromCalendar(end));
}
int colCount = 4;
checkColumnCount(eles, colCount);
eles.remove(0);
StringBuilder error = new StringBuilder("Invalidate row content:");
boolean hasError = false;
List<AirConditionSum> acs = new ArrayList<AirConditionSum>();
String[] r = new String[colCount];
int rawDataRowCount = eles.size();
for (Element ele : eles) {
Elements row = ele.select("td");
for (int i = 0; i < colCount; i++) {
r[i] = row.get(i).ownText();
}
AirConditionSum ac = ACSumFactory.getAirConditionSum(r[0], r[1],
r[2], r[3]);
if (ac == null) {
error.append("\"" + Arrays.toString(r) + "\", ");
hasError = true;
continue;
}
acs.add(ac);
}
ACSumDAOManager.checkAndAddCondition(acs);
int dayCount = CalendarUtil.dayDiff(end, start) + 1;
int recordSize = acs.size();
if (dayCount != recordSize) {
hasError = true;
if (rawDataRowCount == recordSize) {
error.append("["
+ CalendarUtil.getDateStringFromCalendar(start)
+ "] to ["
+ CalendarUtil.getDateStringFromCalendar(end)
+ "] Record Missing from source. Expected: " + dayCount
+ ", Actual: " + rawDataRowCount);
} else {
error.append("["
+ CalendarUtil.getDateStringFromCalendar(start)
+ "] to ["
+ CalendarUtil.getDateStringFromCalendar(end)
+ "] Record count doesn't match caused by parser. Expected: "
+ dayCount + ", Actual: " + recordSize);
}
}
return hasError ? error.toString() : null;
}
private static void checkColumnCount(Elements eles, int colCount)
throws HTMLParseException {
Elements title = eles.get(0).select("td");
if (title.size() != colCount) {
StringBuilder ret = new StringBuilder("Invalidate column count:"
+ title.size() + " ");
for (Element row : title) {
ret.append(row.ownText() + " ");
}
throw new HTMLParseException(ret.toString());
}
}
}