package com.brucezee.jspider.parser.expression;
import com.brucezee.jspider.common.utils.SpiderStrUtils;
import org.apache.commons.lang3.StringUtils;
/**
* 扩展的css query
* Created by zhoubing on 2016/11/29.
*/
public class JsoupExpression {
private String cssQuery;
private boolean isRelative = false;
private String method;
private String parameter;
private int skip = 0;
public static final String METHOD_TEXT = ".text()";
public static final String METHOD_HTML = ".html()";
public static final String METHOD_OUTER_HTML = ".outerHtml()";
public static final String METHOD_OWN_TEXT = ".ownText()";
public static final String METHOD_VAL = ".val()";
private static final String METHOD_ATTR_PREFIX = ".attr(";
private static final String METHOD_ATTR_SUFFIX = ")";
public static final String METHOD_ATTR = METHOD_ATTR_PREFIX+METHOD_ATTR_SUFFIX;
private static final String METHOD_SKIP_PREFIX = ".skip(";
private static final String METHOD_SKIP_SUFFIX = ")";
public static final String METHOD_SKIP = METHOD_SKIP_PREFIX+METHOD_SKIP_SUFFIX;
private static final String RELATIVE_PREFIX = " ";//相对路径的css查询器 前面使用空格作为标志
public JsoupExpression reset() {
//复用对象
this.cssQuery = null;
this.isRelative = false;
this.method = null;
this.parameter = null;
this.skip = 0;
return this;
}
public void parse(String selector) {
reset();
setRelative(selector.startsWith(RELATIVE_PREFIX));
//.text()
//.attr("href")
//.html()
int index = 0;
index = selector.indexOf(METHOD_TEXT);
if (index > 0) {
setCssQuery(selector.substring(0, index));
setMethod(METHOD_TEXT);
return;
}
index = selector.indexOf(METHOD_HTML);
if (index > 0) {
setCssQuery(selector.substring(0, index));
setMethod(METHOD_HTML);
return;
}
index = selector.indexOf(METHOD_ATTR_PREFIX);
if (index > 0) {
setCssQuery(selector.substring(0, index));
setMethod(METHOD_ATTR);
setParameter(getParameterFromSelector(selector));
return;
}
index = selector.indexOf(METHOD_SKIP_PREFIX);
if (index > 0) {
setCssQuery(selector.substring(0, index));
setMethod(METHOD_SKIP);
setSkip(getSkipFromSelector(selector));
return;
}
index = selector.indexOf(METHOD_OUTER_HTML);
if (index > 0) {
setCssQuery(selector.substring(0, index));
setMethod(METHOD_OUTER_HTML);
return;
}
index = selector.indexOf(METHOD_OWN_TEXT);
if (index > 0) {
setCssQuery(selector.substring(0, index));
setMethod(METHOD_OWN_TEXT);
return;
}
index = selector.indexOf(METHOD_VAL);
if (index > 0) {
setCssQuery(selector.substring(0, index));
setMethod(METHOD_VAL);
return;
}
setCssQuery(selector);
}
private String getParameterFromSelector(String text) {
return trimQuotation(SpiderStrUtils.getMiddleText(text, METHOD_ATTR_PREFIX, METHOD_ATTR_SUFFIX));
}
private int getSkipFromSelector(String text) {
text = SpiderStrUtils.getMiddleText(text, METHOD_SKIP_PREFIX, METHOD_SKIP_SUFFIX);
text = SpiderStrUtils.getFirstNumberFromText(text, true);
if (text != null) {
return Integer.parseInt(text);
}
return 0;
}
/**
* 去除字符串两端的空格及双引号
* @param text 文本
* @return 处理后的字符串
*/
private String trimQuotation(String text) {
text = StringUtils.trim(text);
if (text.startsWith("\"")) {
text = text.substring(1);
}
if (text.endsWith("\"")) {
text = text.substring(0, text.length()-1);
}
return StringUtils.trim(text);
}
public boolean isTextMethod() {
return METHOD_TEXT.equals(this.method);
}
public boolean isHtmlMethod() {
return METHOD_HTML.equals(this.method);
}
public boolean isAttrMethod() {
return METHOD_ATTR.equals(this.method);
}
public boolean isOuterHtmlMethod() {
return METHOD_OUTER_HTML.equals(this.method);
}
public boolean isOwnTextMethod() {
return METHOD_OWN_TEXT.equals(this.method);
}
public boolean isValMethod() {
return METHOD_VAL.equals(this.method);
}
public boolean isSkipMethod() {
return METHOD_SKIP.equals(this.method);
}
public String getCssQuery() {
return cssQuery;
}
public void setCssQuery(String cssQuery) {
this.cssQuery = cssQuery;
}
public boolean isRelative() {
return isRelative;
}
public void setRelative(boolean isRelative) {
this.isRelative = isRelative;
}
public String getMethod() {
return method;
}
public void setMethod(String method) {
this.method = method;
}
public String getParameter() {
return parameter;
}
public void setParameter(String parameter) {
this.parameter = parameter;
}
public int getSkip() {
return skip;
}
public void setSkip(int skip) {
this.skip = skip;
}
}