package com.flicklib.tools; import java.util.HashSet; import java.util.Set; import net.htmlparser.jericho.Segment; import net.htmlparser.jericho.StartTag; import net.htmlparser.jericho.TextExtractor; /** * * @author zsombor * */ public class AdvancedTextExtractor extends TextExtractor { private final boolean defaultDecision; private final Set<String> allowedTagNames = new HashSet<String>(); private final Set<String> excludedTagNames = new HashSet<String>(); public AdvancedTextExtractor(Segment segment, boolean defaultDecision) { super(segment); this.defaultDecision = defaultDecision; } public AdvancedTextExtractor addAllowedTagName(String name) { this.allowedTagNames.add(name.toLowerCase()); return this; } public AdvancedTextExtractor addExcludedTagName(String name) { this.excludedTagNames.add(name.toLowerCase()); return this; } @Override public boolean excludeElement(StartTag startTag) { String name = startTag.getName(); if (allowedTagNames.contains(name)) { return false; } if (excludedTagNames.contains(name)) { return true; } return defaultDecision; } }