package us.codecraft.webmagic.model.annotation; import java.lang.annotation.ElementType; import java.lang.annotation.Retention; import java.lang.annotation.Target; /** * Define the extractor for field or class.<br> * * @author code4crafter@gmail.com <br> * @since 0.2.0 */ @Retention(java.lang.annotation.RetentionPolicy.RUNTIME) @Target({ElementType.FIELD, ElementType.TYPE}) public @interface ExtractBy { /** * Extractor expression, support XPath, CSS Selector and regex. * * @return extractor expression */ String value(); /** * types of extractor expressions */ public static enum Type {XPath, Regex, Css, JsonPath} /** * Extractor type, support XPath, CSS Selector and regex. * * @return extractor type */ Type type() default Type.XPath; /** * Define whether the field can be null.<br> * If set to 'true' and the extractor get no result, the entire class will be discarded. <br> * * @return whether the field can be null */ boolean notNull() default false; /** * types of source for extracting. */ public static enum Source { /** * extract from the content extracted by class extractor */ SelectedHtml, /** * extract from the raw html */ RawHtml, RawText } /** * The source for extracting. <br> * It works only if you already added 'ExtractBy' to Class. <br> * * @return the source for extracting */ Source source() default Source.SelectedHtml; /** * Define whether the extractor return more than one result. * When set to 'true', the extractor return a list of string (so you should define the field as List). <br> * * Deprecated since 0.4.2. This option is determined automatically by the class of field. * @deprecated since 0.4.2 * @return whether the extractor return more than one result */ boolean multi() default false; }