package com.geccocrawler.gecco.spider.render.html; import java.lang.reflect.Field; import java.util.HashMap; import java.util.Map; import java.util.Set; import org.apache.commons.lang3.StringUtils; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.jsoup.nodes.Element; import org.mozilla.javascript.Context; import org.mozilla.javascript.NativeArray; import org.mozilla.javascript.NativeJSON; import org.mozilla.javascript.NativeObject; import org.mozilla.javascript.ScriptableObject; import org.reflections.ReflectionUtils; import com.alibaba.fastjson.JSON; import com.geccocrawler.gecco.annotation.JSVar; import com.geccocrawler.gecco.request.HttpRequest; import com.geccocrawler.gecco.response.HttpResponse; import com.geccocrawler.gecco.spider.SpiderBean; import com.geccocrawler.gecco.spider.conversion.Conversion; import com.geccocrawler.gecco.spider.render.FieldRender; import net.sf.cglib.beans.BeanMap; /** * 解析页面中的javascript变量 * * @author huchengyi * */ public class JSVarFieldRender implements FieldRender { private static Log log = LogFactory.getLog(JSVarFieldRender.class); @Override @SuppressWarnings({ "unchecked" }) public void render(HttpRequest request, HttpResponse response, BeanMap beanMap, SpiderBean bean) { Context cx = Context.enter(); ScriptableObject scope = cx.initSafeStandardObjects(); String windowScript = "var window = {};var document = {};"; cx.evaluateString(scope, windowScript, "window", 1, null); HtmlParser parser = new HtmlParser(request.getUrl(), response.getContent()); for (Element ele : parser.$("script")) { String sc = ele.html(); if (StringUtils.isNotEmpty(sc)) { try { cx.evaluateString(scope, sc, "", 1, null); } catch (Exception ex) { // ex.printStackTrace(); } } } Map<String, Object> fieldMap = new HashMap<String, Object>(); Set<Field> jsVarFields = ReflectionUtils.getAllFields(bean.getClass(), ReflectionUtils.withAnnotation(JSVar.class)); for (Field jsVarField : jsVarFields) { Object value = injectJsVarField(request, beanMap, jsVarField, cx, scope); if(value != null) { fieldMap.put(jsVarField.getName(), value); } } beanMap.putAll(fieldMap); Context.exit(); } @SuppressWarnings({ "rawtypes" }) private Object injectJsVarField(HttpRequest request, BeanMap beanMap, Field field, Context cx, ScriptableObject scope) { Class clazz = field.getType(); JSVar jsVar = field.getAnnotation(JSVar.class); String var = jsVar.var(); Object jsObj = scope.get(var, scope); if (jsObj instanceof NativeObject || jsObj instanceof NativeArray) { String jsonPath = jsVar.jsonpath(); // 将javascript变量格式化为json对象 Object jsonObj = NativeJSON.stringify(cx, scope, jsObj, null, null); // 使用fastjson将json字符串格式化为JSONObject Object json = JSON.parse(jsonObj.toString()); // 解析jsonpath Object src = com.alibaba.fastjson.JSONPath.eval(json, jsonPath); // 如果解析出来的是字符串,尝试转换为json对象 try { if (src instanceof String) { src = JSON.parse(src.toString()); } } catch (Exception ex) { } // 将json对象转换为javabean属性 try { Object value = Conversion.getValue(clazz, src); return value; } catch (Exception e) { log.error("field [" + field.getName() + "] conversion error, value=" + src); } } else if (jsObj instanceof Boolean || jsObj instanceof Number || jsObj instanceof String) { try { Object value = Conversion.getValue(clazz, jsObj); return value; } catch (Exception e) { log.error("field [" + field.getName() + "] conversion error, value=" + jsObj); } } return null; } public static void main(String[] args) { Object json = JSON.parse("{ads:[{id:1},{id:2}],test:'test111'}"); Object src = com.alibaba.fastjson.JSONPath.eval(json, "$.ads"); if (src instanceof String) { src = JSON.parse(src.toString()); } System.out.println(src); Object src2 = com.alibaba.fastjson.JSONPath.eval(json, "$.test"); if (src2 instanceof String) { src2 = JSON.parse(src2.toString()); } System.out.println(src2); } }