/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.stanbol.entityhub.indexing.source.jenatdb; import java.io.File; import java.io.FileInputStream; import java.io.IOException; import java.io.InputStream; import java.util.Collections; import java.util.Comparator; import java.util.HashMap; import java.util.List; import java.util.Map; import java.util.Map.Entry; import java.util.TreeMap; import org.apache.commons.io.IOUtils; import org.apache.stanbol.commons.namespaceprefix.NamespaceMappingUtils; import org.apache.stanbol.commons.namespaceprefix.NamespacePrefixProvider; import org.apache.stanbol.commons.namespaceprefix.NamespacePrefixService; import org.apache.stanbol.entityhub.indexing.core.config.IndexingConfig; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import com.hp.hpl.jena.graph.Node; public class PropertyPrefixFilter implements RdfImportFilter { private final Logger log = LoggerFactory.getLogger(PropertyPrefixFilter.class); /** * Links to a file that defines included & excluded properties (one per line)<p> * <b>Syntax</b> * <ul> * <li>Lines starting with '#' are ignored * <li>'!{prefix}' will exclude all properties starting with the {prefix}. * <li>'{prefix}' will include all properties starting with {prefix} * <li>'*' will include all properties not explicitly excluded * <li> Namespace prefixes are supported! * <li> '{prefix}*' is also supported. However all {prefix} values are * interpreted like that. * </ul> * <b>NOTES</b>: (1) Longer prefixes are matched first. (1) All processed * values are stored in-memory. That means that matching prefixes are only * calculate on the first appearance of an property. */ public static final String PARAM_PROPERTY_FILTERS = "if-property-filter"; public PropertyPrefixFilter(){} /** * For unit tests only * @param nsPrefixService * @param lines */ protected PropertyPrefixFilter(NamespacePrefixProvider nsPrefixService, List<String> lines){ parsePropertyPrefixConfig(nsPrefixService, lines); } private Map<String, Boolean> propertyPrefixMap; private Map<String, Boolean> propertyMap; private boolean includeAll; @Override public void setConfiguration(Map<String,Object> config) { IndexingConfig indexingConfig = (IndexingConfig)config.get(IndexingConfig.KEY_INDEXING_CONFIG); NamespacePrefixService nsPrefixService = indexingConfig.getNamespacePrefixService(); log.info("Configure {}",getClass().getSimpleName()); Object value = config.get(PARAM_PROPERTY_FILTERS); if(value == null){ propertyPrefixMap = Collections.emptyMap(); propertyMap = Collections.emptyMap(); includeAll = true; } else { log.info(" > property Prefix Filters"); //ensure that longer prefixes are first File propertyPrefixConfig = indexingConfig.getConfigFile(value.toString()); List<String> lines; InputStream in = null; try { in = new FileInputStream(propertyPrefixConfig); lines = IOUtils.readLines(in,"UTF-8"); }catch (IOException e) { throw new IllegalArgumentException("Unable to read property filter configuration " + "from the configured File "+propertyPrefixConfig.getAbsolutePath(),e); } finally { IOUtils.closeQuietly(in); } parsePropertyPrefixConfig(nsPrefixService, lines); } } /** * @param nsPrefixService * @param propertyPrefixConfig */ private void parsePropertyPrefixConfig(NamespacePrefixProvider nsPrefixService, List<String> lines) { propertyPrefixMap = new TreeMap<String,Boolean>(new Comparator<String>() { @Override public int compare(String o1, String o2) { int length = o2.length() - o1.length(); if(length != 0){ return length; } else { return o1.compareTo(o2); } } }); propertyMap = new HashMap<String,Boolean>(); includeAll = lines.remove("*"); log.info(" - includeAll: {}",includeAll); for(String line : lines){ if(line.startsWith("#") || line.isEmpty() || line.equals("*")){ continue; //ignore comment, empty lines and multiple '*' } boolean exclude = line.charAt(0) == '!'; String prefix = exclude ? line.substring(1) : line; prefix = prefix.trim(); if(includeAll && !exclude){ continue; //ignore includes if * is active } String uri; String nsPrefix = NamespaceMappingUtils.getPrefix(prefix); if(nsPrefix != null){ String ns = nsPrefixService.getNamespace(nsPrefix); if(ns == null){ throw new IllegalArgumentException("Unable to resolve namesoace prefix used by '" +prefix+"' by using the NamespacePrefixService!"); } uri = new StringBuilder(ns).append(prefix,nsPrefix.length()+1, prefix.length()).toString(); } else { uri = prefix; } if(uri.charAt(uri.length()-1) == '*'){ uri = uri.substring(0, uri.length()-1); } log.info(" - '{}' {}", uri, exclude ? "excluded" : "included"); propertyPrefixMap.put(uri, !exclude); } } @Override public boolean needsInitialisation() { return false; } @Override public void initialise() { } @Override public void close() { } @Override public boolean accept(Node s, Node p, Node o) { if(p.isURI()){ if(includeAll && propertyPrefixMap.isEmpty()){ return true; } String property = p.getURI(); Boolean state = propertyMap.get(property); if(state != null){ return state; } //first time we encounter this property ... need to calculate for(Entry<String,Boolean> entry : propertyPrefixMap.entrySet()){ if(property.startsWith(entry.getKey())){ propertyMap.put(property, entry.getValue()); return entry.getValue(); } } //no match ... set based on includeAll propertyMap.put(property, includeAll); } else { return false; } return false; } }