/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.stanbol.entityhub.indexing.core.processor;
import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
import java.util.HashSet;
import java.util.Iterator;
import java.util.Map;
import org.apache.stanbol.commons.namespaceprefix.NamespaceMappingUtils;
import org.apache.stanbol.commons.namespaceprefix.NamespacePrefixProvider;
import org.apache.stanbol.entityhub.indexing.core.EntityProcessor;
import org.apache.stanbol.entityhub.indexing.core.config.IndexingConfig;
import org.apache.stanbol.entityhub.servicesapi.model.Reference;
import org.apache.stanbol.entityhub.servicesapi.model.Representation;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
/**
* A simple Processor that allows to filter {@link Representation} based on
* {@link Reference#getReference()} values of a configured Field.<p>
* Typically used to filter Representations based on the type (rdf:type)<p>
* Parsing '*' as value for the field deactivates filtering. A missing
* field configuration is assumed as Error and will cause an
* {@link IllegalArgumentException} during {@link #setConfiguration(Map)}
* @author Rupert Westenthaler
*
*/
public class FieldValueFilter implements EntityProcessor{
private final Logger log = LoggerFactory.getLogger(FieldValueFilter.class);
public static final String PARAM_FIELD = "field";
public static final String PARAM_VALUES = "values";
public static final String DEFAULT_FIELD = "rdf:type";
protected String field;
protected boolean includeAll = false;
protected Collection<String> included;
protected Collection<String> exclude;
//now represented by adding "" to included and exclude
//boolean includeEmpty;
private NamespacePrefixProvider nsPrefixProvider;
public FieldValueFilter() {}
/**
* Only for unit testing
*/
protected FieldValueFilter(NamespacePrefixProvider nsPrefixProvider, String field, Object filterConfig){
this.nsPrefixProvider = nsPrefixProvider;
this.field = getUri(field);
parseFilterConfig(filterConfig);
}
@Override
public Representation process(Representation source) {
if(includeAll && exclude.isEmpty()){
return source; //filter inactive
}
Iterator<Reference> refs = source.getReferences(field);
if(!refs.hasNext()){ //no values and includeNull
return (includeAll && !exclude.contains("")) || //include and empty not excluded
(!includeAll && included.contains("")) ? //empty is included
source : null;
}
while(refs.hasNext()){
String value = refs.next().getReference();
if((includeAll && !exclude.contains(value)) || //include and empty not excluded
(!includeAll && included.contains(value))){ //empty is included
return source;
}
}
//not found -> filter
return null;
}
@Override
public void close() {
}
@Override
public void initialise() {
}
@Override
public boolean needsInitialisation() {
return false;
}
@Override
public void setConfiguration(Map<String,Object> config) {
log.info("> configure {}",getClass().getSimpleName());
IndexingConfig indexingConfig = (IndexingConfig)config.get(IndexingConfig.KEY_INDEXING_CONFIG);
nsPrefixProvider = indexingConfig.getNamespacePrefixService();
Object value = config.get(PARAM_FIELD);
if(value == null || value.toString().isEmpty()){
this.field = getUri(DEFAULT_FIELD);
} else {
this.field = getUri(value.toString());
}
log.info(" - field: {}",field);
value = config.get(PARAM_VALUES);
log.info(" - filters:");
parseFilterConfig(value);
}
/**
* @param value
*/
@SuppressWarnings("unchecked")
private void parseFilterConfig(Object value) {
Collection<String> values;
if(value instanceof String){
values = Arrays.asList(value.toString().split(";"));
} else if (value instanceof String[]){
values = Arrays.asList((String[])value);
} else if(value == null){ // no values (accept all entities with any value)
values = Collections.emptySet();
} else if(value instanceof Collection<?>){
values = (Collection<String>)value;
} else {
throw new IllegalArgumentException("Parameter '" + PARAM_VALUES
+ "' must be of type String, String[] or Collection<String> (present: "
+ value.getClass()+")!");
}
if(values.isEmpty()){
includeAll = true;
this.included = values;
this.exclude = Collections.emptySet();
} else {
this.included = new HashSet<String>();
this.exclude = new HashSet<String>();
for(String entry : values) {
if(entry == null){ //NULL is a valid option, but we use "" instead
entry = "";
}
entry = entry.trim();
if(entry.equalsIgnoreCase("null")){
entry = "";
}
if(!includeAll && entry.equals("*")){
log.info(" - includeAll");
includeAll = true;
continue;
}
boolean exclude = !entry.isEmpty() && entry.charAt(0) == '!';
if(exclude){
entry = entry.substring(1);
if(entry.equalsIgnoreCase("null")){
entry = "";
}
if(entry.equals("*")){
throw new IllegalArgumentException("'!*' is not allowed in the config ("
+ "it is the default if '*' is not present)!");
}
}
String uri = getUri(entry);
if((exclude ? this.included : this.exclude).contains(uri)){
throw new IllegalArgumentException("'"+entry+"' both included and excluded by the"
+ "parsed configuration!");
}
//if exclude add to this.exclude otherwise to this.values
(exclude ? this.exclude : this.included).add(uri);
log.info(" - {} {}",exclude ? "exclude" : "include", uri.isEmpty() ? "<empty>" : uri);
}
}
//if only excludes are configured add the include all
if(!includeAll && !exclude.isEmpty() && included.isEmpty()){
log.info(" - includeAll (because only exclusions are configured");
includeAll = true;
}
}
/**
* @param entry
* @return
*/
private String getUri(String entry) {
String uri;
String nsPrefix = NamespaceMappingUtils.getPrefix(entry);
if(nsPrefix != null){
String ns = nsPrefixProvider.getNamespace(nsPrefix);
if(ns == null){
throw new IllegalArgumentException("Unable to resolve namesoace prefix used by '"
+entry+"' by using the NamespacePrefixService!");
}
uri = new StringBuilder(ns).append(entry,nsPrefix.length()+1, entry.length()).toString();
} else {
uri = entry;
}
return uri;
}
}