ReorderFixed.java example

Explorer
jena-master
/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.jena.sparql.engine.optimizer.reorder;

import org.apache.jena.sparql.engine.optimizer.Pattern ;
import org.apache.jena.sparql.engine.optimizer.StatsMatcher ;
import org.apache.jena.sparql.engine.optimizer.reorder.PatternTriple ;
import org.apache.jena.sparql.engine.optimizer.reorder.ReorderTransformationSubstitution ;
import org.apache.jena.sparql.graph.NodeConst ;
import org.apache.jena.sparql.sse.Item ;
import static org.apache.jena.sparql.engine.optimizer.reorder.PatternElements.* ;

/** Fixed scheme for choosing based on the triple patterns, without
 *  looking at the data.  It gives a weight to a triple, with more grounded terms
 *  being considered better.  It weights against rdf:type because that can be 
 *  very unselective (e.g. ?x rdf:type rdf:Resource)
 */
public class ReorderFixed extends ReorderTransformationSubstitution {
    /*
     * How it works:
     * 
     * Choose the 'best' pattern, propagate the fact that variables are now
     * bound (ReorderTransformationSubstitution) then chooses the next triple
     * pattern.
     * 
     * Instead of just one set of rules, we make an exception is rdf:type. ?x
     * rdf:type :T or ?x rdf:type ?type are often very much less selective and
     * we want to give them special, poorer weighings. We do this by controlling
     * the order of matching: first check to see if it's rdf;type in the
     * predicate position, then apply the appropriate matcher.
     * 
     * If we just used a single StatsMatcher with all the rules, the
     * VAR/TERM/TERM or VAR/TERM/VAR rules match rdf:type with lower weightings.
     * 
     * The relative order of equal weightings is not changed.
     * 
     * There are two StatsMatchers: 'matcher' and 'matcherRdfType'
     * applied for the normal case and the rdf:type case.
     */
    
    public ReorderFixed() {}

    private static Item              type                = Item.createNode(NodeConst.nodeRDFType) ;

    /** The number of triples used for the base scale */
    public static final int                MultiTermSampleSize = 100 ;

    // Used for general patterns
    private final static StatsMatcher matcher             = new StatsMatcher() ;
    // Used to override choices made by the matcher above.
    private final static StatsMatcher matcherRdfType      = new StatsMatcher() ;
    
    static { init() ; }
    
    private static void init() {
        //ype = Item.createNode(NodeConst.nodeRDFType) ;
        // rdf:type can be a bad choice e.g rdf:type rdf:Resource
        // with inference enabled.
        // Weight use of rdf:type worse then the general pattern
        // that would also match by using two matchers. 
        
        // Numbers chosen as an approximation ratios for a graph of 100 triples

        // 1 : TERM type TERM is builtin (SPO).
        // matcherRdfType.addPattern(new Pattern(1, TERM, TERM, TERM)) ; 
        matcherRdfType.addPattern(new Pattern(5, VAR, type, TERM)) ;
        matcherRdfType.addPattern(new Pattern(50, VAR, type, VAR)) ;
        
        // SPO - built-in - not needed as a rule
        // matcher.addPattern(new Pattern(1, TERM, TERM, TERM)) ; 

        matcher.addPattern(new Pattern(2, TERM, TERM, VAR)) ;                   // SP?
        matcher.addPattern(new Pattern(3, VAR, TERM, TERM)) ;                   // ?PO
        matcher.addPattern(new Pattern(2, TERM, TERM, TERM)) ;                  // S?O

        matcher.addPattern(new Pattern(10, TERM, VAR, VAR)) ;                   // S??
        matcher.addPattern(new Pattern(20, VAR, VAR, TERM)) ;                   // ??O
        matcher.addPattern(new Pattern(30, VAR, TERM, VAR)) ;                   // ?P?

        matcher.addPattern(new Pattern(MultiTermSampleSize, VAR, VAR, VAR)) ;   // ???
    }

    @Override
    public double weight(PatternTriple pt) {
        // Special case rdf:type first to make it lower(worse) than 
        // VAR, TERM, TERM which would otherwise be used.
        if ( type.equals(pt.predicate) ) {
            double w = matcherRdfType.match(pt) ;
            if ( w > 0 )
                return w ;
        }
        return matcher.match(pt) ;
    }
}