package hk.reality.stock.service.fetcher;
import hk.reality.stock.Constants;
import org.apache.commons.lang.StringUtils;
import org.apache.http.client.HttpClient;
import org.apache.http.conn.ClientConnectionManager;
import org.apache.http.conn.scheme.PlainSocketFactory;
import org.apache.http.conn.scheme.Scheme;
import org.apache.http.conn.scheme.SchemeRegistry;
import org.apache.http.impl.client.DefaultHttpClient;
import org.apache.http.impl.conn.tsccm.ThreadSafeClientConnManager;
import org.apache.http.params.BasicHttpParams;
import org.apache.http.params.HttpConnectionParams;
import org.apache.http.params.HttpParams;
import org.apache.http.params.HttpProtocolParams;
import org.htmlcleaner.CleanerProperties;
import org.htmlcleaner.HtmlCleaner;
import org.htmlcleaner.TagNode;
import org.htmlcleaner.XPatherException;
public abstract class BaseQuoteFetcher implements QuoteFetcher {
private HttpClient client;
private HtmlCleaner cleaner;
private static final int TIMEOUT = 10;
public BaseQuoteFetcher() {
HttpParams params = new BasicHttpParams();
HttpConnectionParams.setConnectionTimeout(params, TIMEOUT * 1000);
HttpConnectionParams.setSoTimeout(params, TIMEOUT * 1000);
HttpProtocolParams.setUserAgent(params, Constants.USER_AGENT);
SchemeRegistry schemeRegistry = new SchemeRegistry();
schemeRegistry.register(new Scheme("http", PlainSocketFactory.getSocketFactory(), 80));
ClientConnectionManager cm = new ThreadSafeClientConnManager(params, schemeRegistry);
this.client = new DefaultHttpClient(cm, params);
this.cleaner = new HtmlCleaner();
CleanerProperties prop = cleaner.getProperties();
prop.setOmitDoctypeDeclaration(true);
prop.setOmitUnknownTags(true);
prop.setOmitComments(true);
prop.setIgnoreQuestAndExclam(true);
prop.setOmitDeprecatedTags(true);
prop.setOmitXmlDeclaration(true);
prop.setAdvancedXmlEscape(false);
prop.setRecognizeUnicodeChars(false);
prop.setOmitHtmlEnvelope(false);
prop.setUseCdataForScriptAndStyle(true);
}
/**
* From the document, search the specified xpath, return the TagNode of first matched element
* @param document
* @param xpath
* @return
* @throws XPatherException
*/
public TagNode getFirstMatchedElement(TagNode document, String xpath) throws XPatherException {
Object[] xpathResult = document.evaluateXPath(xpath);
for(int i=0; i<xpathResult.length; i++) {
if (xpathResult[i] instanceof TagNode) {
return (TagNode) xpathResult[i];
}
}
return null;
}
/**
* From the document, search the specified xpath, return the content text of first matched element
* @param document
* @param xpath
* @return
* @throws XPatherException
*/
public String getFirstMatchedElementContent(TagNode document, String xpath) throws XPatherException {
Object[] elements = document.evaluateXPath(xpath);
for(int i=0; i<elements.length; i++) {
if (elements[i] instanceof TagNode) {
TagNode node = (TagNode) elements[i];
return StringUtils.trim(node.getText().toString());
}
}
return "";
}
/**
* @return the client
*/
public HttpClient getClient() {
return client;
}
/**
* @return the cleaner
*/
public HtmlCleaner getCleaner() {
return cleaner;
}
}