package com.geccocrawler.gecco.spider.render; import java.lang.reflect.Field; import java.util.List; import java.util.Set; import org.apache.commons.lang3.StringUtils; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.reflections.ReflectionUtils; import com.geccocrawler.gecco.annotation.FieldRenderName; import com.geccocrawler.gecco.annotation.Href; import com.geccocrawler.gecco.request.HttpRequest; import com.geccocrawler.gecco.response.HttpResponse; import com.geccocrawler.gecco.scheduler.DeriveSchedulerContext; import com.geccocrawler.gecco.spider.SpiderBean; import com.geccocrawler.gecco.utils.ReflectUtils; import net.sf.cglib.beans.BeanMap; /** * render抽象方法,主要包括注入基本的属性和自定义属性注入。将特定的html、json、xml注入放入实现类 * * @author huchengyi * */ public abstract class AbstractRender implements Render { private static Log log = LogFactory.getLog(AbstractRender.class); /** * request请求的注入 */ private RequestFieldRender requestFieldRender; /** * request参数的注入 */ private RequestParameterFieldRender requestParameterFieldRender; /** * 自定义注入 */ private CustomFieldRenderFactory customFieldRenderFactory; public AbstractRender() { this.requestFieldRender = new RequestFieldRender(); this.requestParameterFieldRender = new RequestParameterFieldRender(); } @Override @SuppressWarnings({ "unchecked" }) public SpiderBean inject(Class<? extends SpiderBean> clazz, HttpRequest request, HttpResponse response) { try { SpiderBean bean = clazz.newInstance(); BeanMap beanMap = BeanMap.create(bean); requestFieldRender.render(request, response, beanMap, bean); requestParameterFieldRender.render(request, response, beanMap, bean); fieldRender(request, response, beanMap, bean); Set<Field> customFields = ReflectionUtils.getAllFields(bean.getClass(), ReflectionUtils.withAnnotation(FieldRenderName.class)); for (Field customField : customFields) { FieldRenderName fieldRender = customField.getAnnotation(FieldRenderName.class); String name = fieldRender.value(); CustomFieldRender customFieldRender = customFieldRenderFactory.getCustomFieldRender(name); if (customFieldRender != null) { customFieldRender.render(request, response, beanMap, bean, customField); } } requests(request, bean); return bean; } catch(Exception ex) { //throw new RenderException(ex.getMessage(), clazz); log.error("instance SpiderBean error", ex); return null; } } public abstract void fieldRender(HttpRequest request, HttpResponse response, BeanMap beanMap, SpiderBean bean); /** * 需要继续抓取的请求 */ @Override @SuppressWarnings({ "unchecked" }) public void requests(HttpRequest request, SpiderBean bean) { BeanMap beanMap = BeanMap.create(bean); Set<Field> hrefFields = ReflectionUtils.getAllFields(bean.getClass(), ReflectionUtils.withAnnotation(Href.class)); for (Field hrefField : hrefFields) { Href href = hrefField.getAnnotation(Href.class); if (href.click()) { Object o = beanMap.get(hrefField.getName()); if (o == null) { continue; } boolean isList = ReflectUtils.haveSuperType(o.getClass(), List.class);// 是List类型 if (isList) { List<String> list = (List<String>) o; for (String url : list) { if (StringUtils.isNotEmpty(url)) { DeriveSchedulerContext.into(request.subRequest(url)); } } } else { String url = (String) o; if (StringUtils.isNotEmpty(url)) { DeriveSchedulerContext.into(request.subRequest(url)); } } } } } public void setCustomFieldRenderFactory(CustomFieldRenderFactory customFieldRenderFactory) { this.customFieldRenderFactory = customFieldRenderFactory; } }