/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.jena.sparql.engine.optimizer.reorder; import org.apache.jena.sparql.engine.optimizer.Pattern ; import org.apache.jena.sparql.engine.optimizer.StatsMatcher ; import org.apache.jena.sparql.engine.optimizer.reorder.PatternTriple ; import org.apache.jena.sparql.engine.optimizer.reorder.ReorderTransformationSubstitution ; import org.apache.jena.sparql.graph.NodeConst ; import org.apache.jena.sparql.sse.Item ; import static org.apache.jena.sparql.engine.optimizer.reorder.PatternElements.* ; /** Fixed scheme for choosing based on the triple patterns, without * looking at the data. It gives a weight to a triple, with more grounded terms * being considered better. It weights against rdf:type because that can be * very unselective (e.g. ?x rdf:type rdf:Resource) */ public class ReorderFixed extends ReorderTransformationSubstitution { /* * How it works: * * Choose the 'best' pattern, propagate the fact that variables are now * bound (ReorderTransformationSubstitution) then chooses the next triple * pattern. * * Instead of just one set of rules, we make an exception is rdf:type. ?x * rdf:type :T or ?x rdf:type ?type are often very much less selective and * we want to give them special, poorer weighings. We do this by controlling * the order of matching: first check to see if it's rdf;type in the * predicate position, then apply the appropriate matcher. * * If we just used a single StatsMatcher with all the rules, the * VAR/TERM/TERM or VAR/TERM/VAR rules match rdf:type with lower weightings. * * The relative order of equal weightings is not changed. * * There are two StatsMatchers: 'matcher' and 'matcherRdfType' * applied for the normal case and the rdf:type case. */ public ReorderFixed() {} private static Item type = Item.createNode(NodeConst.nodeRDFType) ; /** The number of triples used for the base scale */ public static final int MultiTermSampleSize = 100 ; // Used for general patterns private final static StatsMatcher matcher = new StatsMatcher() ; // Used to override choices made by the matcher above. private final static StatsMatcher matcherRdfType = new StatsMatcher() ; static { init() ; } private static void init() { //ype = Item.createNode(NodeConst.nodeRDFType) ; // rdf:type can be a bad choice e.g rdf:type rdf:Resource // with inference enabled. // Weight use of rdf:type worse then the general pattern // that would also match by using two matchers. // Numbers chosen as an approximation ratios for a graph of 100 triples // 1 : TERM type TERM is builtin (SPO). // matcherRdfType.addPattern(new Pattern(1, TERM, TERM, TERM)) ; matcherRdfType.addPattern(new Pattern(5, VAR, type, TERM)) ; matcherRdfType.addPattern(new Pattern(50, VAR, type, VAR)) ; // SPO - built-in - not needed as a rule // matcher.addPattern(new Pattern(1, TERM, TERM, TERM)) ; matcher.addPattern(new Pattern(2, TERM, TERM, VAR)) ; // SP? matcher.addPattern(new Pattern(3, VAR, TERM, TERM)) ; // ?PO matcher.addPattern(new Pattern(2, TERM, TERM, TERM)) ; // S?O matcher.addPattern(new Pattern(10, TERM, VAR, VAR)) ; // S?? matcher.addPattern(new Pattern(20, VAR, VAR, TERM)) ; // ??O matcher.addPattern(new Pattern(30, VAR, TERM, VAR)) ; // ?P? matcher.addPattern(new Pattern(MultiTermSampleSize, VAR, VAR, VAR)) ; // ??? } @Override public double weight(PatternTriple pt) { // Special case rdf:type first to make it lower(worse) than // VAR, TERM, TERM which would otherwise be used. if ( type.equals(pt.predicate) ) { double w = matcherRdfType.match(pt) ; if ( w > 0 ) return w ; } return matcher.match(pt) ; } }