/**
* License Agreement for OpenSearchServer
*
* Copyright (C) 2010-2015 Emmanuel Keller / Jaeksoft
*
* http://www.open-search-server.com
*
* This file is part of OpenSearchServer.
*
* OpenSearchServer is free software: you can redistribute it and/or
* modify it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* OpenSearchServer is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with OpenSearchServer.
* If not, see <http://www.gnu.org/licenses/>.
**/
package com.jaeksoft.searchlib.crawler.web.database;
import java.util.ArrayList;
import java.util.List;
import java.util.Set;
import java.util.TreeSet;
import java.util.regex.Pattern;
import org.apache.commons.lang3.StringEscapeUtils;
import org.w3c.dom.Node;
import org.xml.sax.SAXException;
import com.jaeksoft.searchlib.util.DomUtils;
import com.jaeksoft.searchlib.util.EnumerationUtils;
import com.jaeksoft.searchlib.util.XmlWriter;
public class UrlFilterItem implements Comparable<UrlFilterItem> {
public enum Type {
QUERY, FRAGMENT;
public final static Type find(String type) {
return EnumerationUtils.lookup(Type.class, type, QUERY);
}
}
private transient Pattern compiledPattern;
private String name;
private String pattern;
private Set<String> hostnames;
private Type type;
private UrlFilterItem() {
compiledPattern = null;
name = null;
pattern = null;
hostnames = null;
type = Type.QUERY;
}
public UrlFilterItem(String name, String pattern) {
this();
setName(name);
setPattern(pattern);
}
public UrlFilterItem(Node node) {
this();
setName(DomUtils.getAttributeText(node, "name"));
setType(Type.find(DomUtils.getAttributeText(node, "type")));
List<Node> nodes = DomUtils.getNodes(node, "pattern");
if (nodes != null && nodes.size() > 0)
setPattern(StringEscapeUtils.unescapeXml(DomUtils.getText(nodes
.get(0))));
else
setPattern(StringEscapeUtils.unescapeXml(DomUtils.getText(node)));
nodes = DomUtils.getNodes(node, "hostname");
if (nodes != null)
for (Node n : nodes)
addHostname(StringEscapeUtils.unescapeXml(DomUtils.getText(n)));
}
public void copyTo(UrlFilterItem filter) {
filter.name = this.name;
filter.pattern = this.pattern;
filter.compiledPattern = this.compiledPattern;
filter.hostnames = this.hostnames == null ? null : new TreeSet<String>(
this.hostnames);
filter.type = this.type;
}
/**
* @return the name
*/
public String getName() {
return name;
}
/**
* @param name
* the name to set
*/
public void setName(String name) {
this.name = name;
}
/**
* @return the type
*/
public Type getType() {
return type;
}
/**
* @param type
* the type to set
*/
public void setType(Type type) {
this.type = type == null ? Type.QUERY : type;
}
private void compilePattern() {
if (pattern == null || pattern.length() == 0) {
compiledPattern = null;
return;
}
compiledPattern = Pattern.compile(pattern, Pattern.CASE_INSENSITIVE);
}
/**
* @return the pattern
*/
public String getPattern() {
return pattern;
}
/**
* @param pattern
* the pattern to set
*/
public void setPattern(String pattern) {
if (pattern == null)
this.pattern = null;
else
this.pattern = pattern;
compilePattern();
}
public Set<String> getHostnameSet() {
return hostnames;
}
public List<String> getHostnameList() {
if (hostnames == null)
return null;
return new ArrayList<String>(hostnames);
}
public void addHostname(String hostname) {
if (hostname == null)
return;
if (hostname.length() == 0)
return;
if (hostnames == null)
hostnames = new TreeSet<String>();
hostnames.add(hostname);
}
public void removeHostname(String hostname) {
if (hostnames == null)
return;
hostnames.remove(hostname);
}
public boolean isHostnames() {
if (hostnames == null)
return false;
return hostnames.size() > 0;
}
public boolean hostnameCheck(String hostname) {
if (hostnames == null)
return true;
return hostnames.contains(hostname);
}
public void writeXml(XmlWriter xmlWriter) throws SAXException {
xmlWriter.startElement("urlFilter", "name", name, "type", type.name());
xmlWriter.startElement("pattern");
xmlWriter.textNode(pattern);
xmlWriter.endElement();
if (hostnames != null) {
for (String hostname : hostnames) {
xmlWriter.startElement("hostname");
xmlWriter.textNode(hostname);
xmlWriter.endElement();
}
}
xmlWriter.endElement();
}
@Override
public int compareTo(UrlFilterItem o) {
return this.name.compareTo(o.name);
}
public final boolean isReplacePart(String hostname, String part) {
if (compiledPattern == null)
return false;
if (part == null)
return false;
if (!hostnameCheck(hostname))
return false;
return compiledPattern.matcher(part).matches();
}
public final void doReplaceQuery(String hostname, String[] queryParts) {
if (compiledPattern == null)
return;
if (queryParts == null)
return;
if (!hostnameCheck(hostname))
return;
for (int i = 0; i < queryParts.length; i++) {
String queryPart = queryParts[i];
if (queryPart != null)
if (compiledPattern.matcher(queryPart).matches())
queryParts[i] = null;
}
}
}