package com.yahoo.glimmer.indexing.preprocessor;
/*
* Copyright (c) 2012 Yahoo! Inc. All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License.
* You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0
* Unless required by applicable law or agreed to in writing, software distributed under the License is
* distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and limitations under the License.
* See accompanying LICENSE file.
*/
import org.semanticweb.yars.nx.Resource;
public class PredicatePrefixTupleFilter implements TupleFilter {
private static final String RDF_SYNTAX_NS = "http://www.w3.org/1999/02/22-rdf-syntax-ns#";
private static final String RDF_SCHEMA_NS = "http://www.w3.org/2000/01/rdf-schema#";
private static final String OWL_NS = "http://www.w3.org/2002/07/owl#";
private String predicatePrefix;
private String rdfTypePrefix;
private boolean filterNonMatches;
private boolean lowercase;
private StringBuilder sb;
public PredicatePrefixTupleFilter() {
readResolve();
}
public void setPredicatePrefix(String predicatePrefix) {
this.predicatePrefix = predicatePrefix;
readResolve();
}
public void setRdfTypePrefix(String rdfTypePrefix) {
this.rdfTypePrefix = rdfTypePrefix;
}
public void setLowercase(boolean lowercase) {
this.lowercase = lowercase;
}
public void setFilterNonMatches(boolean filterNonMatches) {
this.filterNonMatches = filterNonMatches;
}
// On instantiation by XStream Java's field initialization or default
// constructor aren't used used.
// XStream uses the same mechanism as the JDK serialization.
private Object readResolve() {
if (predicatePrefix != null) {
sb = new StringBuilder(predicatePrefix);
}
return this;
}
// TODO. simplify logic...
@Override
public boolean filter(Tuple tuple) {
if (tuple.predicate.type != TupleElement.Type.RESOURCE) {
return false;
}
String predicateText = tuple.predicate.text;
if (predicateText.startsWith(RDF_SCHEMA_NS) || predicateText.startsWith(OWL_NS)) {
return true;
}
if (predicateText.startsWith(RDF_SYNTAX_NS)) {
if (rdfTypePrefix != null) {
if (!tuple.object.text.startsWith(rdfTypePrefix)) {
return false;
}
String objectText = rewriteResource(tuple.object.text, rdfTypePrefix);
if (!objectText.equals(tuple.object.text)) {
tuple.object.text = objectText.toLowerCase();
tuple.object.n3 = new Resource(tuple.object.text).toN3();
}
}
return true;
}
if (tuple.predicate.text.startsWith(predicatePrefix)) {
predicateText = rewriteResource(predicateText, predicatePrefix);
if (!predicateText.equals(tuple.predicate.text)) {
tuple.predicate.text = predicateText.toLowerCase();
tuple.predicate.n3 = new Resource(tuple.predicate.text).toN3();
}
return true;
} else if (!filterNonMatches) {
if (lowercase) {
predicateText = tuple.predicate.text.toLowerCase();
}
} else {
return false;
}
if (!predicateText.equals(tuple.predicate.text)) {
tuple.predicate.text = predicateText;
tuple.predicate.n3 = new Resource(predicateText).toN3();
}
return true;
}
private String rewriteResource(String url, String removeUrlPrefix) {
String s = url;
if (lowercase) {
s = s.toLowerCase();
}
int end = s.length();
while (s.charAt(--end) == '/') {
}
int start = s.lastIndexOf('/', end) + 1;
if (start > removeUrlPrefix.length()) {
// remove path
sb.setLength(removeUrlPrefix.length());
sb.append(url.substring(start, end + 1));
s = sb.toString();
}
return s;
}
}